1 Importing Datasets

1.1 Total PM2.5 Files

Here I am importing the file which contains global DNAm estimates by ELISA-based MethylFlash that are linked to the patient ID.

outfile1 <- here("PFF_Results/PFF_MethylFlash_ExptID_1-751_Combined_2022_05_05.xlsx")
dnam <- read_excel(outfile1, sheet="CalculatedValues")

Match up PM25 data with methylation data

outfile2 <- here("PM25_Data/Final_PFF_fILD_PM_BaselineData_2022_02_12.xlsx")
PM <- read_excel(outfile2)

1.2 Keep only required columns

str(dnam)
## tibble [751 × 12] (S3: tbl_df/tbl/data.frame)
##  $ SSID          : chr [1:751] "11R4536" "04R1356" "10R4092" "12R5016" ...
##  $ sampID        : num [1:751] 79012226 79011626 79090026 79014826 79013526 ...
##  $ exptID        : num [1:751] 1 2 3 4 5 6 7 8 9 10 ...
##  $ run_date      : POSIXct[1:751], format: "2022-04-06" "2022-04-06" ...
##  $ plate         : num [1:751] 1 1 1 1 1 1 1 1 1 1 ...
##  $ well          : chr [1:751] "G2" "A3" "C3" "E3" ...
##  $ raw_mean      : num [1:751] 0.1109 0.0711 0.1041 0.0822 0.0923 ...
##  $ raw_stdev     : num [1:751] 0.00545 0.0017 0.01407 0.00325 0.01103 ...
##  $ coef_var      : num [1:751] 0.0491 0.0239 0.1352 0.0396 0.1195 ...
##  $ calc_slope    : num [1:751] 0.0935 0.0935 0.0935 0.0935 0.0935 ...
##  $ calc_intercept: num [1:751] 0.335 0.335 0.335 0.335 0.335 ...
##  $ pct_5mC       : num [1:751] 0.0905 0.0592 0.0842 0.0666 0.0742 ...
dnam <- dnam %>% dplyr::select(SSID, sampID, exptID, run_date, plate, pct_5mC)

1.3 Join Columns

dnam <- left_join(dnam, PM, by="SSID")

2 Add Original PM2.5 Matching Files with All Months

outfile2b <- here("PM25_Data/PFF_fILD_2000_2018_PM25_2021_10_08.xlsx")
PM <- read_excel(outfile2b)
PM <- PM %>% rename("SSID"="ID")
dnam <- inner_join(PM, dnam, by="SSID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))
dnam <- dnam %>% dplyr::select(!c(PM_date, value))

2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(7:234), names_to="PM_date", names_prefix="PM25_")

2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$PM_date <- gsub("jan", "01-01-20", dnamx$PM_date)
dnamx$PM_date <- gsub("feb", "01-02-20", dnamx$PM_date)
dnamx$PM_date <- gsub("mar", "01-03-20", dnamx$PM_date)
dnamx$PM_date <- gsub("apr", "01-04-20", dnamx$PM_date)
dnamx$PM_date <- gsub("may", "01-05-20", dnamx$PM_date)
dnamx$PM_date <- gsub("jun", "01-06-20", dnamx$PM_date)
dnamx$PM_date <- gsub("jul", "01-07-20", dnamx$PM_date)
dnamx$PM_date <- gsub("aug", "01-08-20", dnamx$PM_date)
dnamx$PM_date <- gsub("sep", "01-09-20", dnamx$PM_date)
dnamx$PM_date <- gsub("oct", "01-10-20", dnamx$PM_date)
dnamx$PM_date <- gsub("nov", "01-11-20", dnamx$PM_date)
dnamx$PM_date <- gsub("dec", "01-12-20", dnamx$PM_date)

dnamx$PM_date <- format(as.Date(dnamx$PM_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$PM_date <- as.Date(dnamx$PM_date)
dnam <- dnamx

2.3 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("run_date", "dx_date", "consent_date", "death_date", "tx_date", "sample_date", "fvc_date", "dlco_date", "censor_date", "deathORtx_date", "DeathTxCensor_date", "PM_date"), as.Date)
str(dnam)
## tibble [171,228 × 63] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:171228] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow.x            : num [1:171228] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist.x            : num [1:171228] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ SSID              : chr [1:171228] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ lon               : num [1:171228] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
##  $ sampID            : num [1:171228] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:171228] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:171228], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:171228] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:171228] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:171228] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:171228] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:171228] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:171228] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:171228] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:171228] 74 74 74 74 74 ...
##  $ status            : chr [1:171228] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:171228] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:171228], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:171228], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:171228], format: NA NA ...
##  $ death_date        : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:171228], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:171228], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:171228], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:171228], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:171228], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:171228] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:171228] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:171228] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:171228] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:171228] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:171228] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:171228] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:171228] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:171228] "95405" "95405" "95405" "95405" ...
##  $ lat.x             : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
##  $ lon.x             : num [1:171228] -123 -123 -123 -123 -123 ...
##  $ state             : chr [1:171228] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:171228] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:171228] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:171228] 0.063 0.063 0.063 0.063 0.063 ...
##  $ nrow.y            : num [1:171228] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist.y            : num [1:171228] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon.y             : num [1:171228] -123 -123 -123 -123 -123 ...
##  $ lat.y             : num [1:171228] 38.4 38.4 38.4 38.4 38.4 ...
##  $ PM_5yrPreCensor   : num [1:171228] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:171228] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:171228] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:171228] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:171228] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:171228] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:171228] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:171228] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:171228], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:171228] 13 8 6.2 6.5 4.7 ...

2.4 Remove unnecessary columns

dnam <- dnam %>% dplyr::select(!c("nrow.x", "dist.x", "lon", "lat", "lat.x", "lon.x", "nrow.y", "dist.y", "lon.y", "lat.y"))

2.5 Creating PM2.5 Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

2.5.1 5yrs Pre-Sampling

Here I am calculating the average PM value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(PM_5yrPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(PM_date)<=ymd(sample_date)]))

2.5.2 1yr Pre-Sampling

Here I am calculating the average PM value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(PM_1yrPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(PM_date)<=ymd(sample_date)]))

2.5.3 6mo Pre-Sampling

Here I am calculating the average PM value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(PM_6moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(PM_date)<=ymd(sample_date)]))

2.5.4 3mo Pre-Sampling

Here I am calculating the average PM value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(PM_3moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(PM_date)<=ymd(sample_date)]))

2.5.5 1mo Pre-Sampling

Here I am calculating the average PM value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(PM_1moPreSamp = mean(value[ymd(PM_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(PM_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

2.5.6 Remove Dataframes not in use

rm(list=c("dnamx"))

2.5.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 734 observations.

2.5.8 Get rid of non-specific “value” column

dnam <- dnam %>% dplyr::select(!value)

2.5.9 Remove UPitt patients

dnam <- dnam %>% filter(!str_detect(SSID, "^08R"))

This takes us down to 733 observations after we remove UPitt patients which may be doubled up in our Simmons analyses.

3 Add PM2.5 Constituent Files

3.1 SO4 add Original SO4 Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_SO4_2021_11_05.xlsx")
SO4 <- read_excel(outfile3)
dnam <- inner_join(SO4, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

3.1.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="SO4_date", names_prefix="SO4_", names_repair = "minimal")

3.1.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$SO4_date <- gsub("jan", "01-01-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("feb", "01-02-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("mar", "01-03-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("apr", "01-04-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("may", "01-05-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("jun", "01-06-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("jul", "01-07-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("aug", "01-08-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("sep", "01-09-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("oct", "01-10-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("nov", "01-11-20", dnamx$SO4_date)
dnamx$SO4_date <- gsub("dec", "01-12-20", dnamx$SO4_date)

dnamx$SO4_date <- format(as.Date(dnamx$SO4_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$SO4_date <- as.Date(dnamx$SO4_date)
dnam <- dnamx

4 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("SO4_date"), as.Date)
str(dnam)
## tibble [161,136 × 63] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_date          : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 1.3 0.3 0.5 0.6 0.9 ...

4.1 Creating SO4 Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

4.1.1 5yrs Pre-Sampling

Here I am calculating the average SO4 value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SO4_5yrPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(SO4_date)<=ymd(sample_date)]))

4.1.2 1yr Pre-Sampling

Here I am calculating the average SO4 value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SO4_1yrPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(SO4_date)<=ymd(sample_date)]))

4.1.3 6mo Pre-Sampling

Here I am calculating the average SO4 value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SO4_6moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(SO4_date)<=ymd(sample_date)]))

4.1.4 3mo Pre-Sampling

Here I am calculating the average SO4 value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SO4_3moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(SO4_date)<=ymd(sample_date)]))

4.1.5 1mo Pre-Sampling

Here I am calculating the average SO4 value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SO4_1moPreSamp = mean(value[ymd(SO4_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(SO4_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

4.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "PM", "SO4"))

4.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

4.1.8 Getting rid of unnecessary columns

dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, SO4_date, value))

4.2 NO3 add Original NO3 Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_NO3_2021_11_05.xlsx")
NO3 <- read_excel(outfile3)
dnam <- inner_join(NO3, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

4.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="NO3_date", names_prefix="NIT_", names_repair = "minimal")

4.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$NO3_date <- gsub("jan", "01-01-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("feb", "01-02-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("mar", "01-03-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("apr", "01-04-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("may", "01-05-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("jun", "01-06-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("jul", "01-07-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("aug", "01-08-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("sep", "01-09-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("oct", "01-10-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("nov", "01-11-20", dnamx$NO3_date)
dnamx$NO3_date <- gsub("dec", "01-12-20", dnamx$NO3_date)

dnamx$NO3_date <- format(as.Date(dnamx$NO3_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$NO3_date <- as.Date(dnamx$NO3_date)
dnam <- dnamx

5 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("NO3_date"), as.Date)
str(dnam)
## tibble [161,136 × 68] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_date          : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 5 1.2 1.7 0.7 0.3 ...

5.1 Creating NO3 Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

5.1.1 5yrs Pre-Sampling

Here I am calculating the average NO3 value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NO3_5yrPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(NO3_date)<=ymd(sample_date)]))

5.1.2 1yr Pre-Sampling

Here I am calculating the average NO3 value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NO3_1yrPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(NO3_date)<=ymd(sample_date)]))

5.1.3 6mo Pre-Sampling

Here I am calculating the average NO3 value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NO3_6moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(NO3_date)<=ymd(sample_date)]))

5.1.4 3mo Pre-Sampling

Here I am calculating the average NO3 value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NO3_3moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(NO3_date)<=ymd(sample_date)]))

5.1.5 1mo Pre-Sampling

Here I am calculating the average NO3 value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NO3_1moPreSamp = mean(value[ymd(NO3_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(NO3_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

5.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "NO3"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, NO3_date))

5.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

5.2 NH4 add Original NH4 Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_NH4_2021_11_05.xlsx")
NH4 <- read_excel(outfile3)
dnam <- inner_join(NH4, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

5.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="NH4_date", names_prefix="NH4_", names_repair = "minimal")

5.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$NH4_date <- gsub("jan", "01-01-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("feb", "01-02-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("mar", "01-03-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("apr", "01-04-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("may", "01-05-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("jun", "01-06-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("jul", "01-07-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("aug", "01-08-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("sep", "01-09-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("oct", "01-10-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("nov", "01-11-20", dnamx$NH4_date)
dnamx$NH4_date <- gsub("dec", "01-12-20", dnamx$NH4_date)

dnamx$NH4_date <- format(as.Date(dnamx$NH4_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$NH4_date <- as.Date(dnamx$NH4_date)
dnam <- dnamx

6 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("NH4_date"), as.Date)
str(dnam)
## tibble [161,136 × 73] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_5yrPreSamp    : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
##  $ NO3_1yrPreSamp    : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
##  $ NO3_6moPreSamp    : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
##  $ NO3_3moPreSamp    : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
##  $ NO3_1moPreSamp    : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
##  $ NH4_date          : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 0.4 0 0.1 0.1 0.1 ...

6.1 Creating NH4 Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

6.1.1 5yrs Pre-Sampling

Here I am calculating the average NH4 value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NH4_5yrPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(NH4_date)<=ymd(sample_date)]))

6.1.2 1yr Pre-Sampling

Here I am calculating the average NH4 value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NH4_1yrPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(NH4_date)<=ymd(sample_date)]))

6.1.3 6mo Pre-Sampling

Here I am calculating the average NH4 value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NH4_6moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(NH4_date)<=ymd(sample_date)]))

6.1.4 3mo Pre-Sampling

Here I am calculating the average NH4 value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NH4_3moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(NH4_date)<=ymd(sample_date)]))

6.1.5 1mo Pre-Sampling

Here I am calculating the average NH4 value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(NH4_1moPreSamp = mean(value[ymd(NH4_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(NH4_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

6.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "NH4"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, NH4_date))

6.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

6.2 BC add Original BC Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_BC_2021_11_05.xlsx")
BC <- read_excel(outfile3)
dnam <- inner_join(BC, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

6.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="BC_date", names_prefix="BC_", names_repair = "minimal")

6.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$BC_date <- gsub("jan", "01-01-20", dnamx$BC_date)
dnamx$BC_date <- gsub("feb", "01-02-20", dnamx$BC_date)
dnamx$BC_date <- gsub("mar", "01-03-20", dnamx$BC_date)
dnamx$BC_date <- gsub("apr", "01-04-20", dnamx$BC_date)
dnamx$BC_date <- gsub("may", "01-05-20", dnamx$BC_date)
dnamx$BC_date <- gsub("jun", "01-06-20", dnamx$BC_date)
dnamx$BC_date <- gsub("jul", "01-07-20", dnamx$BC_date)
dnamx$BC_date <- gsub("aug", "01-08-20", dnamx$BC_date)
dnamx$BC_date <- gsub("sep", "01-09-20", dnamx$BC_date)
dnamx$BC_date <- gsub("oct", "01-10-20", dnamx$BC_date)
dnamx$BC_date <- gsub("nov", "01-11-20", dnamx$BC_date)
dnamx$BC_date <- gsub("dec", "01-12-20", dnamx$BC_date)

dnamx$BC_date <- format(as.Date(dnamx$BC_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$BC_date <- as.Date(dnamx$BC_date)
dnam <- dnamx

7 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("BC_date"), as.Date)
str(dnam)
## tibble [161,136 × 78] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_5yrPreSamp    : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
##  $ NO3_1yrPreSamp    : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
##  $ NO3_6moPreSamp    : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
##  $ NO3_3moPreSamp    : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
##  $ NO3_1moPreSamp    : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
##  $ NH4_5yrPreSamp    : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
##  $ NH4_1yrPreSamp    : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
##  $ NH4_6moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ NH4_3moPreSamp    : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
##  $ NH4_1moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ BC_date           : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 1.4 1 0.7 1.2 0.9 ...

7.1 Creating BC Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

7.1.1 5yrs Pre-Sampling

Here I am calculating the average BC value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(BC_5yrPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(BC_date)<=ymd(sample_date)]))

7.1.2 1yr Pre-Sampling

Here I am calculating the average BC value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(BC_1yrPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(BC_date)<=ymd(sample_date)]))

7.1.3 6mo Pre-Sampling

Here I am calculating the average BC value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(BC_6moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(BC_date)<=ymd(sample_date)]))

7.1.4 3mo Pre-Sampling

Here I am calculating the average BC value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(BC_3moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(BC_date)<=ymd(sample_date)]))

7.1.5 1mo Pre-Sampling

Here I am calculating the average BC value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(BC_1moPreSamp = mean(value[ymd(BC_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(BC_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

7.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "BC"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, BC_date))

7.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

7.2 OM add Original OM Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_OM_2021_11_05.xlsx")
OM <- read_excel(outfile3)
dnam <- inner_join(OM, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

7.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="OM_date", names_prefix="OM_", names_repair = "minimal")

7.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$OM_date <- gsub("jan", "01-01-20", dnamx$OM_date)
dnamx$OM_date <- gsub("feb", "01-02-20", dnamx$OM_date)
dnamx$OM_date <- gsub("mar", "01-03-20", dnamx$OM_date)
dnamx$OM_date <- gsub("apr", "01-04-20", dnamx$OM_date)
dnamx$OM_date <- gsub("may", "01-05-20", dnamx$OM_date)
dnamx$OM_date <- gsub("jun", "01-06-20", dnamx$OM_date)
dnamx$OM_date <- gsub("jul", "01-07-20", dnamx$OM_date)
dnamx$OM_date <- gsub("aug", "01-08-20", dnamx$OM_date)
dnamx$OM_date <- gsub("sep", "01-09-20", dnamx$OM_date)
dnamx$OM_date <- gsub("oct", "01-10-20", dnamx$OM_date)
dnamx$OM_date <- gsub("nov", "01-11-20", dnamx$OM_date)
dnamx$OM_date <- gsub("dec", "01-12-20", dnamx$OM_date)

dnamx$OM_date <- format(as.Date(dnamx$OM_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$OM_date <- as.Date(dnamx$OM_date)
dnam <- dnamx

8 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("OM_date"), as.Date)
str(dnam)
## tibble [161,136 × 83] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_5yrPreSamp    : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
##  $ NO3_1yrPreSamp    : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
##  $ NO3_6moPreSamp    : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
##  $ NO3_3moPreSamp    : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
##  $ NO3_1moPreSamp    : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
##  $ NH4_5yrPreSamp    : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
##  $ NH4_1yrPreSamp    : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
##  $ NH4_6moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ NH4_3moPreSamp    : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
##  $ NH4_1moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ BC_5yrPreSamp     : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
##  $ BC_1yrPreSamp     : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
##  $ BC_6moPreSamp     : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
##  $ BC_3moPreSamp     : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
##  $ BC_1moPreSamp     : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
##  $ OM_date           : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 10.9 3.2 11.3 9.7 5.5 ...

8.1 Creating OM Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

8.1.1 5yrs Pre-Sampling

Here I am calculating the average OM value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(OM_5yrPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(OM_date)<=ymd(sample_date)]))

8.1.2 1yr Pre-Sampling

Here I am calculating the average OM value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(OM_1yrPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(OM_date)<=ymd(sample_date)]))

8.1.3 6mo Pre-Sampling

Here I am calculating the average OM value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(OM_6moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(OM_date)<=ymd(sample_date)]))

8.1.4 3mo Pre-Sampling

Here I am calculating the average OM value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(OM_3moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(OM_date)<=ymd(sample_date)]))

8.1.5 1mo Pre-Sampling

Here I am calculating the average OM value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(OM_1moPreSamp = mean(value[ymd(OM_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(OM_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

8.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "OM"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, OM_date))

8.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

8.2 SS add Original SS Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_SS_2021_11_05.xlsx")
SS <- read_excel(outfile3)
dnam <- inner_join(SS, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

8.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="SS_date", names_prefix="SS_", names_repair = "minimal")

8.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$SS_date <- gsub("jan", "01-01-20", dnamx$SS_date)
dnamx$SS_date <- gsub("feb", "01-02-20", dnamx$SS_date)
dnamx$SS_date <- gsub("mar", "01-03-20", dnamx$SS_date)
dnamx$SS_date <- gsub("apr", "01-04-20", dnamx$SS_date)
dnamx$SS_date <- gsub("may", "01-05-20", dnamx$SS_date)
dnamx$SS_date <- gsub("jun", "01-06-20", dnamx$SS_date)
dnamx$SS_date <- gsub("jul", "01-07-20", dnamx$SS_date)
dnamx$SS_date <- gsub("aug", "01-08-20", dnamx$SS_date)
dnamx$SS_date <- gsub("sep", "01-09-20", dnamx$SS_date)
dnamx$SS_date <- gsub("oct", "01-10-20", dnamx$SS_date)
dnamx$SS_date <- gsub("nov", "01-11-20", dnamx$SS_date)
dnamx$SS_date <- gsub("dec", "01-12-20", dnamx$SS_date)

dnamx$SS_date <- format(as.Date(dnamx$SS_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$SS_date <- as.Date(dnamx$SS_date)
dnam <- dnamx

9 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("SS_date"), as.Date)
str(dnam)
## tibble [161,136 × 88] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_5yrPreSamp    : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
##  $ NO3_1yrPreSamp    : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
##  $ NO3_6moPreSamp    : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
##  $ NO3_3moPreSamp    : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
##  $ NO3_1moPreSamp    : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
##  $ NH4_5yrPreSamp    : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
##  $ NH4_1yrPreSamp    : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
##  $ NH4_6moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ NH4_3moPreSamp    : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
##  $ NH4_1moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ BC_5yrPreSamp     : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
##  $ BC_1yrPreSamp     : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
##  $ BC_6moPreSamp     : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
##  $ BC_3moPreSamp     : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
##  $ BC_1moPreSamp     : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
##  $ OM_5yrPreSamp     : num [1:161136] 3.95 3.95 3.95 3.95 3.95 ...
##  $ OM_1yrPreSamp     : num [1:161136] 3.08 3.08 3.08 3.08 3.08 ...
##  $ OM_6moPreSamp     : num [1:161136] 3.52 3.52 3.52 3.52 3.52 ...
##  $ OM_3moPreSamp     : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
##  $ OM_1moPreSamp     : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
##  $ SS_date           : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 1.3 1.4 7.4 0.9 1.5 ...

9.1 Creating SS Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

9.1.1 5yrs Pre-Sampling

Here I am calculating the average SS value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SS_5yrPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(SS_date)<=ymd(sample_date)]))

9.1.2 1yr Pre-Sampling

Here I am calculating the average SS value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SS_1yrPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(SS_date)<=ymd(sample_date)]))

9.1.3 6mo Pre-Sampling

Here I am calculating the average SS value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SS_6moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(SS_date)<=ymd(sample_date)]))

9.1.4 3mo Pre-Sampling

Here I am calculating the average SS value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SS_3moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(SS_date)<=ymd(sample_date)]))

9.1.5 1mo Pre-Sampling

Here I am calculating the average SS value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(SS_1moPreSamp = mean(value[ymd(SS_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(SS_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

9.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "SS"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, SS_date))

9.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

9.2 Soil add Original Soil Matching Files with All Months

outfile3 <- here("PM25_Data/PFF_fILD_2000_2017_soil_2021_11_05.xlsx")
Soil <- read_excel(outfile3)
dnam <- inner_join(Soil, dnam, by="ID")

Reorder so “ID” is the first column

dnam <- dnam %>% dplyr::select(ID, everything(.))

9.2.1 Pivoting to Long Format

First we need to convert the PM dataframe into the long rather than the wide format, which will allow us to use it more easily in R’s tidyverse as this is “tidy” formatting.

dnam <- dnam %>% 
  pivot_longer(cols=c(6:221), names_to="Soil_date", names_prefix="soil_", names_repair = "minimal")

9.2.2 Convert date PM_date to same format as above

dnamx <- dnam 
dnamx$Soil_date <- gsub("jan", "01-01-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("feb", "01-02-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("mar", "01-03-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("apr", "01-04-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("may", "01-05-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("jun", "01-06-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("jul", "01-07-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("aug", "01-08-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("sep", "01-09-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("oct", "01-10-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("nov", "01-11-20", dnamx$Soil_date)
dnamx$Soil_date <- gsub("dec", "01-12-20", dnamx$Soil_date)

dnamx$Soil_date <- format(as.Date(dnamx$Soil_date, format="%d-%m-%Y"),"%Y-%m-%d")
dnamx$Soil_date <- as.Date(dnamx$Soil_date)
dnam <- dnamx

10 Convert Date Columns to Date Format

Next I need to convert all date columns to proper format

dnam <- dnam %>% 
  mutate_at(c("Soil_date"), as.Date)
str(dnam)
## tibble [161,136 × 93] (S3: tbl_df/tbl/data.frame)
##  $ ID                : num [1:161136] 513 513 513 513 513 513 513 513 513 513 ...
##  $ nrow              : num [1:161136] 27492333 27492333 27492333 27492333 27492333 ...
##  $ dist              : num [1:161136] 0.00707 0.00707 0.00707 0.00707 0.00707 ...
##  $ lon               : num [1:161136] -123 -123 -123 -123 -123 ...
##  $ lat               : num [1:161136] 38.4 38.4 38.4 38.4 38.4 ...
##  $ SSID              : chr [1:161136] "02R0456" "02R0456" "02R0456" "02R0456" ...
##  $ sampID            : num [1:161136] 77900524 77900524 77900524 77900524 77900524 ...
##  $ exptID            : num [1:161136] 682 682 682 682 682 682 682 682 682 682 ...
##  $ run_date          : Date[1:161136], format: "2022-04-25" "2022-04-25" ...
##  $ plate             : num [1:161136] 17 17 17 17 17 17 17 17 17 17 ...
##  $ pct_5mC           : num [1:161136] 0.105 0.105 0.105 0.105 0.105 ...
##  $ sex               : chr [1:161136] "Male" "Male" "Male" "Male" ...
##  $ race              : chr [1:161136] "W" "W" "W" "W" ...
##  $ dich_Race         : chr [1:161136] "White" "White" "White" "White" ...
##  $ ethnicity         : chr [1:161136] "N" "N" "N" "N" ...
##  $ smokeHx           : chr [1:161136] "Ever" "Ever" "Ever" "Ever" ...
##  $ age_dx            : num [1:161136] 74 74 74 74 74 ...
##  $ status            : chr [1:161136] "1" "1" "1" "1" ...
##  $ deadORtx          : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx                : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_group          : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ dx_date           : Date[1:161136], format: "2016-05-20" "2016-05-20" ...
##  $ consent_date      : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ censor_date       : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ tx_date           : Date[1:161136], format: NA NA ...
##  $ death_date        : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ deathORtx_date    : Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ DeathTxCensor_date: Date[1:161136], format: "2016-06-11" "2016-06-11" ...
##  $ sample_date       : Date[1:161136], format: "2016-05-19" "2016-05-19" ...
##  $ fvc_date          : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ dlco_date         : Date[1:161136], format: "2016-03-17" "2016-03-17" ...
##  $ fvc_pct           : num [1:161136] 58.5 58.5 58.5 58.5 58.5 ...
##  $ dlco_pct          : num [1:161136] 28.8 28.8 28.8 28.8 28.8 ...
##  $ fvc_timefromdx    : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ dlco_timefromdx   : num [1:161136] -0.175 -0.175 -0.175 -0.175 -0.175 ...
##  $ Reason_Termination: chr [1:161136] "Death" "Death" "Death" "Death" ...
##  $ Death_ILD_Related : chr [1:161136] "Yes" "Yes" "Yes" "Yes" ...
##  $ same_zip          : logi [1:161136] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ zip_new           : chr [1:161136] "95405" "95405" "95405" "95405" ...
##  $ state             : chr [1:161136] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:161136] "Santa Rosa" "Santa Rosa" "Santa Rosa" "Santa Rosa" ...
##  $ time_censoring    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_death        : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_tx           : num [1:161136] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ time_DeathTxCensor: num [1:161136] 0.063 0.063 0.063 0.063 0.063 ...
##  $ PM_5yrPreCensor   : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_5yrPreDx       : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM5yrCensor_dich  : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ PM5yr_dich        : chr [1:161136] "Low" "Low" "Low" "Low" ...
##  $ dx_IPF            : chr [1:161136] "IPF" "IPF" "IPF" "IPF" ...
##  $ ruca              : num [1:161136] 1 1 1 1 1 1 1 1 1 1 ...
##  $ metro             : chr [1:161136] "metropolitan" "metropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:161136] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:161136], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:161136] 7.62 7.62 7.62 7.62 7.62 ...
##  $ PM_1yrPreSamp     : num [1:161136] 6.31 6.31 6.31 6.31 6.31 ...
##  $ PM_6moPreSamp     : num [1:161136] 6.6 6.6 6.6 6.6 6.6 ...
##  $ PM_3moPreSamp     : num [1:161136] 5.63 5.63 5.63 5.63 5.63 ...
##  $ PM_1moPreSamp     : num [1:161136] 6.35 6.35 6.35 6.35 6.35 ...
##  $ SO4_5yrPreSamp    : num [1:161136] 0.673 0.673 0.673 0.673 0.673 ...
##  $ SO4_1yrPreSamp    : num [1:161136] 0.475 0.475 0.475 0.475 0.475 ...
##  $ SO4_6moPreSamp    : num [1:161136] 0.417 0.417 0.417 0.417 0.417 ...
##  $ SO4_3moPreSamp    : num [1:161136] 0.567 0.567 0.567 0.567 0.567 ...
##  $ SO4_1moPreSamp    : num [1:161136] 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 0.75 ...
##  $ NO3_5yrPreSamp    : num [1:161136] 0.923 0.923 0.923 0.923 0.923 ...
##  $ NO3_1yrPreSamp    : num [1:161136] 0.8 0.8 0.8 0.8 0.8 ...
##  $ NO3_6moPreSamp    : num [1:161136] 0.883 0.883 0.883 0.883 0.883 ...
##  $ NO3_3moPreSamp    : num [1:161136] 0.533 0.533 0.533 0.533 0.533 ...
##  $ NO3_1moPreSamp    : num [1:161136] 0.6 0.6 0.6 0.6 0.6 ...
##  $ NH4_5yrPreSamp    : num [1:161136] 0.143 0.143 0.143 0.143 0.143 ...
##  $ NH4_1yrPreSamp    : num [1:161136] 0.0583 0.0583 0.0583 0.0583 0.0583 ...
##  $ NH4_6moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ NH4_3moPreSamp    : num [1:161136] 0.0333 0.0333 0.0333 0.0333 0.0333 ...
##  $ NH4_1moPreSamp    : num [1:161136] 0.05 0.05 0.05 0.05 0.05 ...
##  $ BC_5yrPreSamp     : num [1:161136] 0.738 0.738 0.738 0.738 0.738 ...
##  $ BC_1yrPreSamp     : num [1:161136] 0.575 0.575 0.575 0.575 0.575 ...
##  $ BC_6moPreSamp     : num [1:161136] 0.667 0.667 0.667 0.667 0.667 ...
##  $ BC_3moPreSamp     : num [1:161136] 0.367 0.367 0.367 0.367 0.367 ...
##  $ BC_1moPreSamp     : num [1:161136] 0.35 0.35 0.35 0.35 0.35 ...
##  $ OM_5yrPreSamp     : num [1:161136] 3.95 3.95 3.95 3.95 3.95 ...
##  $ OM_1yrPreSamp     : num [1:161136] 3.08 3.08 3.08 3.08 3.08 ...
##  $ OM_6moPreSamp     : num [1:161136] 3.52 3.52 3.52 3.52 3.52 ...
##  $ OM_3moPreSamp     : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
##  $ OM_1moPreSamp     : num [1:161136] 2.1 2.1 2.1 2.1 2.1 ...
##  $ SS_5yrPreSamp     : num [1:161136] 1.34 1.34 1.34 1.34 1.34 ...
##  $ SS_1yrPreSamp     : num [1:161136] 1.1 1.1 1.1 1.1 1.1 ...
##  $ SS_6moPreSamp     : num [1:161136] 1.52 1.52 1.52 1.52 1.52 ...
##  $ SS_3moPreSamp     : num [1:161136] 2.07 2.07 2.07 2.07 2.07 ...
##  $ SS_1moPreSamp     : num [1:161136] 2.3 2.3 2.3 2.3 2.3 ...
##  $ Soil_date         : Date[1:161136], format: "2000-01-01" "2000-02-01" ...
##  $ value             : num [1:161136] 0.4 0.3 0.3 0.6 0.4 ...

10.1 Creating Soil Exposure Variables

Now I’m creating new variables where I am matching up PM averages per year to years of major events for patients (year of diagnosis, year of death/lung transplant/censoring, etc)

10.1.1 5yrs Pre-Sampling

Here I am calculating the average Soil value in the 5yrs prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(Soil_5yrPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - years(5)) & 
                                        ymd(Soil_date)<=ymd(sample_date)]))

10.1.2 1yr Pre-Sampling

Here I am calculating the average Soil value in the 1yr prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(Soil_1yrPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - years(1)) & 
                                        ymd(Soil_date)<=ymd(sample_date)]))

10.1.3 6mo Pre-Sampling

Here I am calculating the average Soil value in the 6mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(Soil_6moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(6)) & 
                                        ymd(Soil_date)<=ymd(sample_date)]))

10.1.4 3mo Pre-Sampling

Here I am calculating the average Soil value in the 3mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(Soil_3moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(3)) & 
                                        ymd(Soil_date)<=ymd(sample_date)]))

10.1.5 1mo Pre-Sampling

Here I am calculating the average Soil value in the 1mo prior to sampling.

dnam <- dnam %>% 
  group_by(ID) %>% 
  mutate(Soil_1moPreSamp = mean(value[ymd(Soil_date)>=(ymd(sample_date) - months(2)) & 
                                        ymd(Soil_date)<=ymd(sample_date)]))

Of note, the 1mo pre-sampling with this method actually provides the average PM2.5 value for the month of sampling and the 1mo prior to sampling because the sampling could have occurred at the beginning or end of a month.

10.1.6 Remove Dataframes and variables not in use

rm(list=c("dnamx", "Soil"))
dnam <- dnam %>% dplyr::select(!c(nrow, dist, lon, lat, value, Soil_date))

10.1.7 Getting Rid of Duplicated IDs

Now that we have calculated the patient-specific PM exposures, I can get rid of all rows other than the first row for each patient.

dnam <-  dnam %>% 
  distinct_at(vars(ID), .keep_all=T)

This takes us down to 733 observations.

10.2 Reorder columns

dnam <- dnam %>% select(ID, everything(.))

11 Export File with All PM2.5 and Constituent Time Frames Matched

write_xlsx(dnam, path="PFF_fILD_PM25andConstituentsMatched_forMethylFlash_2022_04_28.xlsx")

12 Releveling Factors

Here I am releveling factors so that they are in an intuitive order for my later analyses.

dnam$sex <- fct_relevel(dnam$sex, c("Male","Female"))
dnam$race <- fct_relevel(dnam$race, c("W","B","A","U"))
dnam$ethnicity <- fct_relevel(dnam$ethnicity, c("N","H","U"))
dnam$dich_Race <- fct_relevel(dnam$dich_Race, c("White","Non-White"))
dnam$smokeHx <- fct_relevel(dnam$smokeHx, c("Never","Ever"))

#For dx and dx_group, I just want IPF to be first and then the rest of the categories are alphabetical
dnam$dx <- fct_relevel(dnam$dx, c("IPF"))
dnam$dx_group <- fct_relevel(dnam$dx_group, c("IPF"))
dnam$dx_IPF <- fct_relevel(dnam$dx_IPF, c("IPF"))
#dnam$PM5yrCensor_dich <- fct_relevel(dnam$PM5yrCensor_dich, c("Low"))
#dnam$PM5yr_dich <- fct_relevel(dnam$PM5yr_dich, c("Low"))
#dnam$SO45yrCensor_dich <- fct_relevel(dnam$SO45yrCensor_dich, c("Low"))
#dnam$SO45yr_dich <- fct_relevel(dnam$SO45yr_dich, c("Low"))
#dnam$NO35yrCensor_dich <- fct_relevel(dnam$NO35yrCensor_dich, c("Low"))
#dnam$NO35yr_dich <- fct_relevel(dnam$NO35yr_dich, c("Low"))
#dnam$NH45yrCensor_dich <- fct_relevel(dnam$NH45yrCensor_dich, c("Low"))
#dnam$NH45yr_dich <- fct_relevel(dnam$NH45yr_dich, c("Low"))
#dnam$BC5yrCensor_dich <- fct_relevel(dnam$BC5yrCensor_dich, c("Low"))
#dnam$BC5yr_dich <- fct_relevel(dnam$BC5yr_dich, c("Low"))
#dnam$OM5yrCensor_dich <- fct_relevel(dnam$OM5yrCensor_dich, c("Low"))
#dnam$OM5yr_dich <- fct_relevel(dnam$OM5yr_dich, c("Low"))
#dnam$SS5yrCensor_dich <- fct_relevel(dnam$SS5yrCensor_dich, c("Low"))
#dnam$SS5yr_dich <- fct_relevel(dnam$SS5yr_dich, c("Low"))
#dnam$Soil5yrCensor_dich <- fct_relevel(dnam$Soil5yrCensor_dich, c("Low"))
#dnam$Soil5yr_dich <- fct_relevel(dnam$Soil5yr_dich, c("Low"))
str(dnam)
## grouped_df [746 × 92] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ ID                : num [1:746] 513 514 515 516 517 519 520 521 523 524 ...
##  $ SSID              : chr [1:746] "02R0456" "02R0462" "02R0468" "02R0471" ...
##  $ sampID            : num [1:746] 77900524 77902774 79049726 77903624 79002826 ...
##  $ exptID            : num [1:746] 682 324 495 274 267 245 748 15 714 246 ...
##  $ run_date          : Date[1:746], format: "2022-04-25" "2022-04-12" ...
##  $ plate             : num [1:746] 17 8 13 7 7 6 19 1 18 6 ...
##  $ pct_5mC           : num [1:746] 0.1049 0.1397 0.1322 0.0622 0.0492 ...
##  $ sex               : Factor w/ 2 levels "Male","Female": 1 1 1 2 1 1 1 1 1 1 ...
##  $ race              : Factor w/ 4 levels "W","B","A","U": 1 1 1 4 1 1 1 1 1 1 ...
##  $ dich_Race         : Factor w/ 2 levels "White","Non-White": 1 1 1 2 1 1 1 1 1 1 ...
##  $ ethnicity         : Factor w/ 3 levels "N","H","U": 1 1 1 1 1 1 3 1 1 1 ...
##  $ smokeHx           : Factor w/ 2 levels "Never","Ever": 2 2 1 2 2 2 1 1 1 2 ...
##  $ age_dx            : num [1:746] 74 74.2 83.6 74.5 72.3 ...
##  $ status            : chr [1:746] "1" "0" "0" "1" ...
##  $ deadORtx          : num [1:746] 1 0 0 1 0 0 0 0 0 0 ...
##  $ dx                : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx_group          : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dx_date           : Date[1:746], format: "2016-05-20" "2016-07-08" ...
##  $ consent_date      : Date[1:746], format: "2016-05-19" "2016-07-07" ...
##  $ censor_date       : Date[1:746], format: "2016-06-11" "2016-07-11" ...
##  $ tx_date           : Date[1:746], format: NA NA ...
##  $ death_date        : Date[1:746], format: "2016-06-11" NA ...
##  $ deathORtx_date    : Date[1:746], format: "2016-06-11" NA ...
##  $ DeathTxCensor_date: Date[1:746], format: "2016-06-11" "2016-07-11" ...
##  $ sample_date       : Date[1:746], format: "2016-05-19" "2016-07-07" ...
##  $ fvc_date          : Date[1:746], format: "2016-03-17" "2016-04-14" ...
##  $ dlco_date         : Date[1:746], format: "2016-03-17" "2016-04-14" ...
##  $ fvc_pct           : num [1:746] 58.5 54.4 NA 81.4 97.2 ...
##  $ dlco_pct          : num [1:746] 28.8 32.7 NA 46.5 41.1 ...
##  $ fvc_timefromdx    : num [1:746] -0.175 -0.233 NA 0.832 -0.162 ...
##  $ dlco_timefromdx   : num [1:746] -0.175 -0.233 NA 0.832 -0.162 ...
##  $ Reason_Termination: chr [1:746] "Death" "Lost to Follow-up" NA "Death" ...
##  $ Death_ILD_Related : chr [1:746] "Yes" NA NA "Yes" ...
##  $ same_zip          : logi [1:746] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ zip               : chr [1:746] "95405" "95667" "94578" "94945" ...
##  $ zip_new           : chr [1:746] "95405" "95667" "94578" "94945" ...
##  $ state             : chr [1:746] "CA" "CA" "CA" "CA" ...
##  $ major_city        : chr [1:746] "Santa Rosa" "Placerville" "San Leandro" "Novato" ...
##  $ time_censoring    : num [1:746] 0.063 0.011 3.392 3.184 1.681 ...
##  $ time_death        : num [1:746] 0.063 NA NA 3.184 NA ...
##  $ time_tx           : num [1:746] NA NA NA NA NA NA NA NA NA NA ...
##  $ time_deathORtx    : num [1:746] 0.063 NA NA 3.184 NA ...
##  $ time_DeathTxCensor: num [1:746] 0.063 0.011 3.392 3.184 1.681 ...
##  $ PM_5yrPreCensor   : num [1:746] 7.62 5.87 10.82 8.52 5.17 ...
##  $ PM_5yrPreDx       : num [1:746] 7.62 5.87 9.72 8.27 5.48 ...
##  $ PM5yrCensor_dich  : chr [1:746] "Low" "Low" "High" "High" ...
##  $ PM5yr_dich        : chr [1:746] "Low" "Low" "High" "High" ...
##  $ dx_IPF            : Factor w/ 1 level "IPF": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ruca              : num [1:746] 1 4 1 1 4 1 1 1 1 1 ...
##  $ metro             : chr [1:746] "metropolitan" "micropolitan" "metropolitan" "metropolitan" ...
##  $ site              : chr [1:746] "02R" "02R" "02R" "02R" ...
##  $ PM_date           : Date[1:746], format: "2000-01-01" "2000-01-01" ...
##  $ PM_5yrPreSamp     : num [1:746] 7.62 5.87 9.28 7.97 5.48 ...
##  $ PM_1yrPreSamp     : num [1:746] 6.31 4.67 8.3 6.4 4.58 ...
##  $ PM_6moPreSamp     : num [1:746] 6.6 4.43 9.02 6 5.77 ...
##  $ PM_3moPreSamp     : num [1:746] 5.63 5.03 9.43 6.27 6.33 ...
##  $ PM_1moPreSamp     : num [1:746] 6.35 5.25 8.55 6.15 6.7 ...
##  $ SO4_5yrPreSamp    : num [1:746] 0.673 0.475 0.767 0.69 0.448 ...
##  $ SO4_1yrPreSamp    : num [1:746] 0.475 0.333 0.708 0.508 0.358 ...
##  $ SO4_6moPreSamp    : num [1:746] 0.417 0.367 0.5 0.6 0.467 ...
##  $ SO4_3moPreSamp    : num [1:746] 0.567 0.467 0.433 0.867 0.4 ...
##  $ SO4_1moPreSamp    : num [1:746] 0.75 0.5 0.5 0.9 0.35 ...
##  $ NO3_5yrPreSamp    : num [1:746] 0.923 0.545 1.563 1.032 0.307 ...
##  $ NO3_1yrPreSamp    : num [1:746] 0.8 0.408 1.183 0.825 0.225 ...
##  $ NO3_6moPreSamp    : num [1:746] 0.883 0.35 1.433 0.717 0.233 ...
##  $ NO3_3moPreSamp    : num [1:746] 0.533 0.233 1.6 0.6 0.3 ...
##  $ NO3_1moPreSamp    : num [1:746] 0.6 0.25 1.4 0.6 0.35 ...
##  $ NH4_5yrPreSamp    : num [1:746] 0.143 0.225 0.305 0.242 0.232 ...
##  $ NH4_1yrPreSamp    : num [1:746] 0.0583 0.15 0.2417 0.1417 0.2333 ...
##  $ NH4_6moPreSamp    : num [1:746] 0.05 0.1167 0.3167 0.0833 0.2333 ...
##  $ NH4_3moPreSamp    : num [1:746] 0.0333 0.0333 0.4 0.0667 0.1667 ...
##  $ NH4_1moPreSamp    : num [1:746] 0.05 0 0.3 0.05 0.2 ...
##  $ BC_5yrPreSamp     : num [1:746] 0.738 0.342 0.715 0.557 0.328 ...
##  $ BC_1yrPreSamp     : num [1:746] 0.575 0.3 0.525 0.517 0.225 ...
##  $ BC_6moPreSamp     : num [1:746] 0.667 0.2 0.6 0.383 0.283 ...
##  $ BC_3moPreSamp     : num [1:746] 0.367 0.233 0.667 0.333 0.367 ...
##  $ BC_1moPreSamp     : num [1:746] 0.35 0.25 0.55 0.35 0.45 ...
##  $ OM_5yrPreSamp     : num [1:746] 3.95 2.08 3.5 2.62 2.19 ...
##  $ OM_1yrPreSamp     : num [1:746] 3.08 2.02 3.12 2.46 2.02 ...
##  $ OM_6moPreSamp     : num [1:746] 3.52 1.47 3.8 2.1 2.9 ...
##  $ OM_3moPreSamp     : num [1:746] 2.1 1.93 4.23 2.13 4.1 ...
##  $ OM_1moPreSamp     : num [1:746] 2.1 2.35 3.55 2.25 4.9 ...
##  $ SS_5yrPreSamp     : num [1:746] 1.345 0.2567 1.8717 1.69 0.0717 ...
##  $ SS_1yrPreSamp     : num [1:746] 1.1 0.3083 1.7833 1.5583 0.0667 ...
##  $ SS_6moPreSamp     : num [1:746] 1.5167 0.35 1.1333 2.0667 0.0667 ...
##  $ SS_3moPreSamp     : num [1:746] 2.0667 0.3 1.2333 2 0.0667 ...
##  $ SS_1moPreSamp     : num [1:746] 2.3 0.25 0.9 1.85 0 ...
##  $ Soil_5yrPreSamp   : num [1:746] 0.443 0.39 0.475 0.353 0.512 ...
##  $ Soil_1yrPreSamp   : num [1:746] 0.433 0.275 0.467 0.317 0.525 ...
##  $ Soil_6moPreSamp   : num [1:746] 0.383 0.317 0.5 0.333 0.8 ...
##  $ Soil_3moPreSamp   : num [1:746] 0.4 0.433 0.267 0.433 1.033 ...
##  $ Soil_1moPreSamp   : num [1:746] 0.5 0.5 0.25 0.4 1.25 ...
##  - attr(*, "groups")= tibble [746 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ ID   : num [1:746] 513 514 515 516 517 519 520 521 523 524 ...
##   ..$ .rows: list<int> [1:746] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..$ : int 59
##   .. ..$ : int 60
##   .. ..$ : int 61
##   .. ..$ : int 62
##   .. ..$ : int 63
##   .. ..$ : int 64
##   .. ..$ : int 65
##   .. ..$ : int 66
##   .. ..$ : int 67
##   .. ..$ : int 68
##   .. ..$ : int 69
##   .. ..$ : int 70
##   .. ..$ : int 71
##   .. ..$ : int 72
##   .. ..$ : int 73
##   .. ..$ : int 74
##   .. ..$ : int 75
##   .. ..$ : int 76
##   .. ..$ : int 77
##   .. ..$ : int 78
##   .. ..$ : int 79
##   .. ..$ : int 80
##   .. ..$ : int 81
##   .. ..$ : int 82
##   .. ..$ : int 83
##   .. ..$ : int 84
##   .. ..$ : int 85
##   .. ..$ : int 86
##   .. ..$ : int 87
##   .. ..$ : int 88
##   .. ..$ : int 89
##   .. ..$ : int 90
##   .. ..$ : int 91
##   .. ..$ : int 92
##   .. ..$ : int 93
##   .. ..$ : int 94
##   .. ..$ : int 95
##   .. ..$ : int 96
##   .. ..$ : int 97
##   .. ..$ : int 98
##   .. ..$ : int 99
##   .. .. [list output truncated]
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE

13 Creating New Variables

13.1 Time between Registry Enrollment and Sample Date

Need to know if consent_date is a reasonable time point to be basing our DNAm results from

dnam <- dnam %>% mutate(time_sample=(sample_date-consent_date)/365.25)
dnam$time_sample <- as.numeric(dnam$time_sample)

14 Proportion %5mC

Will need to use the actual proportion of 5mC for beta regression analyses where values must be between 0 and 1

dnam <- dnam %>% mutate(prop_5mC=pct_5mC/100)

15 Exploratory Data Analysis

This is a function that allows me to make tables which summarize the count and percentages of each level of factor variables

n_prop_tbl <- function(x) {
  tbl <- table(x)
  res <- cbind(tbl, round(prop.table(tbl)*100,2))
  colnames(res) <-  c('Count', 'Percentage')
  res
}

15.1 PM Breakdown

print("PM2.5")
## [1] "PM2.5"
summary(dnam$PM_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.053   7.395   8.280   8.237   9.098  16.142       1
summary(dnam$PM_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.642   6.708   7.608   7.526   8.308  15.175       1
summary(dnam$PM_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.500   6.600   7.617   7.571   8.500  17.967      22
summary(dnam$PM_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.967   6.367   7.433   7.506   8.500  20.200      12
summary(dnam$PM_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.900   6.300   7.350   7.525   8.700  23.000       6
print("/nSO4")
## [1] "/nSO4"
summary(dnam$SO4_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.280   1.233   1.542   1.444   1.733   2.445       1
summary(dnam$SO4_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1750  0.8833  1.1333  1.0955  1.2875  1.9333       5
summary(dnam$SO4_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.150   0.850   1.100   1.063   1.283   1.950      40
summary(dnam$SO4_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1667  0.8333  1.0667  1.0615  1.3000  2.1333     125
summary(dnam$SO4_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.10    0.80    1.05    1.06    1.30    2.25     143
print("/nNO3")
## [1] "/nNO3"
summary(dnam$NO3_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.07167 0.49000 0.83667 0.90407 1.31042 4.26333       1
summary(dnam$NO3_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0750  0.4000  0.6583  0.7423  1.0083  3.4667       5
summary(dnam$NO3_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0800  0.4000  0.6000  0.7996  0.9833  5.5000      40
summary(dnam$NO3_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.3000  0.5333  0.7497  0.9000  7.8000     125
summary(dnam$NO3_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.3000  0.5000  0.7328  0.9000  7.4000     143
print("/nNH4")
## [1] "/nNH4"
summary(dnam$NH4_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.02833 0.32167 0.45500 0.47358 0.60833 1.38000       1
summary(dnam$NH4_1yrPreSamp)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## 0.008333 0.169231 0.250000 0.274247 0.358333 1.066667        5
summary(dnam$NH4_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1667  0.2500  0.2849  0.3500  1.9667      40
summary(dnam$NH4_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1333  0.2333  0.2700  0.3333  2.5000     125
summary(dnam$NH4_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1500  0.2000  0.2623  0.3500  2.2000     143
print("/nBC")
## [1] "/nBC"
summary(dnam$BC_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1317  0.4967  0.6133  0.6134  0.7167  1.9650       1
summary(dnam$BC_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.08461 0.47500 0.60000 0.59388 0.70833 1.28571       5
summary(dnam$BC_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0500  0.4667  0.6000  0.6240  0.7500  1.8250      40
summary(dnam$BC_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.03333 0.43333 0.60000 0.58884 0.73333 1.90000     125
summary(dnam$BC_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.4000  0.6000  0.5854  0.7500  2.0000     143
print("/nOM")
## [1] "/nOM"
summary(dnam$OM_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.7333  2.2650  2.7333  2.8295  3.4267  6.9350       1
summary(dnam$OM_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.8077  2.3500  2.9583  3.0226  3.5667  8.7444       5
summary(dnam$OM_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.3167  2.3500  3.0333  3.1523  3.8000 12.6250      40
summary(dnam$OM_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.300   2.200   2.967   3.028   3.700  10.133     125
summary(dnam$OM_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.200   2.142   2.850   3.042   3.750  10.500     143
print("/nSS")
## [1] "/nSS"
summary(dnam$SS_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.01333 0.18833 0.24167 0.35948 0.36833 2.52034       1
summary(dnam$SS_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1833  0.2444  0.3898  0.3923  3.2714       5
summary(dnam$SS_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1333  0.2333  0.3821  0.4167  4.3667      40
summary(dnam$SS_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1333  0.2333  0.3786  0.4333  4.9333     125
summary(dnam$SS_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1000  0.2000  0.3902  0.4500  5.5000     143
print("/nSoil")
## [1] "/nSoil"
summary(dnam$Soil_5yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.06833 0.35500 0.49333 0.60401 0.72679 2.42833       1
summary(dnam$Soil_1yrPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.05833 0.34167 0.51667 0.58605 0.72500 2.39167       5
summary(dnam$Soil_6moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.03333 0.31667 0.45833 0.57793 0.68333 2.95000      40
summary(dnam$Soil_3moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.03333 0.30000 0.46667 0.59090 0.66667 5.36667     125
summary(dnam$Soil_1moPreSamp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.300   0.450   0.593   0.700   7.100     143

15.2 Sex Breakdown

n_prop_tbl(dnam$sex)
##        Count Percentage
## Male     559      74.93
## Female   187      25.07

15.3 Race and Ethnicity Breakdown

n_prop_tbl(dnam$race)
##   Count Percentage
## W   706      94.64
## B     7       0.94
## A    20       2.68
## U    13       1.74
n_prop_tbl(dnam$ethnicity)
##   Count Percentage
## N   702      94.10
## H    29       3.89
## U    15       2.01

15.4 Smoking History Breakdown

n_prop_tbl(dnam$smokeHx)
##       Count Percentage
## Never   267      35.79
## Ever    479      64.21

15.5 Diagnostic Group Breakdown

n_prop_tbl(dnam$dx_group)
##     Count Percentage
## IPF   746        100

15.6 State Breakdown

n_prop_tbl(dnam$state)
##    Count Percentage
## AL    59       7.92
## AR     2       0.27
## AZ    29       3.89
## CA    57       7.65
## CO     7       0.94
## CT    21       2.82
## DC     3       0.40
## DE     3       0.40
## FL    21       2.82
## GA    36       4.83
## IA     2       0.27
## ID     2       0.27
## IL    22       2.95
## IN     6       0.81
## KS     7       0.94
## KY    15       2.01
## LA    17       2.28
## MA     9       1.21
## MD    18       2.42
## ME     1       0.13
## MI    45       6.04
## MN    29       3.89
## MO    14       1.88
## MS    12       1.61
## MT     1       0.13
## NC    23       3.09
## ND     1       0.13
## NE     1       0.13
## NH     6       0.81
## NJ    14       1.88
## NM     3       0.40
## NV     3       0.40
## NY    63       8.46
## OH    28       3.76
## PA    41       5.50
## SC    14       1.88
## SD     1       0.13
## TN    14       1.88
## TX    50       6.71
## UT     6       0.81
## VA    35       4.70
## WI     2       0.27
## WV     2       0.27

15.7 Metropolitan Breakdown

n_prop_tbl(dnam$metro)
##              Count Percentage
## metropolitan   627      84.16
## micropolitan    63       8.46
## rural           55       7.38

Overwhelming majority of the patients are considered “metropolitan”.

15.8 Age at Diagnosis Breakdown

shapiro.test(dnam$age_dx)
## 
##  Shapiro-Wilk normality test
## 
## data:  dnam$age_dx
## W = 0.96994, p-value = 2.961e-11
#Shapiro tests tells us that age_dx is not normally distributed, so should report median, IQR
summary(dnam$age_dx)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.59   64.15   69.37   68.69   73.95   91.78

Not normally distributed.

15.9 Vital Status Breakdown

n_prop_tbl(dnam$status)
##   Count Percentage
## 0   429      57.51
## 1   213      28.55
## 2   104      13.94

15.10 Baseline Lung Function Breakdown

shapiro.test(dnam$fvc_pct)
## 
##  Shapiro-Wilk normality test
## 
## data:  dnam$fvc_pct
## W = 0.99413, p-value = 0.009972
summary(dnam$fvc_pct)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   23.55   55.28   67.10   68.05   79.70  120.43      68
sd(dnam$fvc_pct, na.rm=T)
## [1] 16.6119
shapiro.test(dnam$dlco_pct)
## 
##  Shapiro-Wilk normality test
## 
## data:  dnam$dlco_pct
## W = 0.92974, p-value < 2.2e-16
summary(dnam$dlco_pct)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    8.51   30.12   39.60   40.74   49.39  168.98     108
sd(dnam$dlco_pct, na.rm=T)
## [1] 15.33611

15.11 Time to Censoring Breakdown

summary(dnam$time_DeathTxCensor)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.317   2.359   2.323   3.330   5.060
sd(dnam$time_DeathTxCensor, na.rm=T)
## [1] 1.298347

15.12 Time between registry enrollment and sample collection

summary(dnam$time_sample)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.00137  0.00000  0.00000  0.01827  0.00000  1.39904
sd(dnam$time_sample, na.rm=T)
## [1] 0.1428165

This indicates that the majority of samples are taken very close to the time of consent, which is our reference for the _5yrPreDx periods for exposure matching, although there are a few where samples were taken far before or far after the consent_date.

16 Visual Exploratory Data Analysis

16.1 Histograms

Histogram of %5mC

(dnam %>% ggplot(aes(x=pct_5mC))+
   geom_histogram(fill="blue", color="black")+
   labs(x="% 5mC", y="Number of patients with IPF", title="% 5mC in Patients with IPF")+
   theme(plot.title = element_text(hjust = 0.5)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

We have some very high outliers, whreas the rest is generally <0.3%

Histogram of %5mC

(dnam %>% ggplot(aes(x=pct_5mC))+
   geom_histogram(fill="blue", color="black")+
   labs(x="% 5mC", y="Number of patients with IPF", title="% 5mC in Patients with IPF")+
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Slightly right-skewed distribution.

16.2 Scatterplots

16.2.1 PM2.5 Scatterplots

Scatterplot of continuous PM2.5 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="PM2.5 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous PM2.5 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="PM2.5 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous PM2.5 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="PM2.5 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing non-finite values (stat_smooth).
## Warning: Removed 31 rows containing missing values (geom_point).

Scatterplot of continuous PM2.5 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="PM2.5 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 23 rows containing non-finite values (stat_smooth).
## Warning: Removed 23 rows containing missing values (geom_point).

Scatterplot of continuous PM2.5 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="PM2.5 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by PM2.5 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

16.3 SO4 Scatterplots

Scatterplot of continuous SO4 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SO4_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SO4 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by SO4 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous SO4 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=PM_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SO4 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by SO4 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,20)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous SO4 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SO4_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SO4 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

Scatterplot of continuous SO4 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SO4_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SO4 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).

Scatterplot of continuous SO4 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SO4_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SO4 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by SO4 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 153 rows containing non-finite values (stat_smooth).
## Warning: Removed 153 rows containing missing values (geom_point).

16.3.1 NO3 Scatterplots

Scatterplot of continuous NO3 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NO3_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NO3 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by NO3 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous NO3 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NO3_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NO3 in 1yrs Pre-Sampling", y="% 5mC", title="%5mC by NO3 1yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Scatterplot of continuous NO3 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NO3_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NO3 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 50 rows containing non-finite values (stat_smooth).
## Warning: Removed 50 rows containing missing values (geom_point).

Scatterplot of continuous NO3 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NO3_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NO3 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 137 rows containing non-finite values (stat_smooth).
## Warning: Removed 137 rows containing missing values (geom_point).

Scatterplot of continuous NO3 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NO3_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NO3 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by NO3 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).

16.3.2 NH4 Scatterplots

Scatterplot of continuous NH4 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NH4_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NH4 in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by NH4 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 1.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous NH4 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NH4_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NH4 in 1yr Pre-Sampling", y="% 5mC", title="%5mC by NH4 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 1.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Scatterplot of continuous NH4 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NH4_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NH4 in 6mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

Scatterplot of continuous NH4 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NH4_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NH4 in 3mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).

Scatterplot of continuous NH4 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=NH4_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="NH4 in 1mo Pre-Sampling", y="% 5mC", title="%5mC by NH4 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).

16.3.3 BC Scatterplots

Scatterplot of continuous BC 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=BC_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="BC in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by BC 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous BC 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=BC_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="BC in 1yr Pre-Sampling", y="% 5mC", title="%5mC by BC 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Scatterplot of continuous BC 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=BC_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="BC in 6mo Pre-Sampling", y="% 5mC", title="%5mC by BC 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

Scatterplot of continuous BC 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=BC_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="BC in 3mo Pre-Sampling", y="% 5mC", title="%5mC by BC 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).

Scatterplot of continuous BC 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=BC_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="BC in 1mo Pre-Sampling", y="% 5mC", title="%5mC by BC 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,2)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 153 rows containing non-finite values (stat_smooth).
## Warning: Removed 153 rows containing missing values (geom_point).

16.3.4 OM Scatterplots

Scatterplot of continuous OM 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=OM_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="OM in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by OM 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 8)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous OM 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=OM_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="OM in 1yr Pre-Sampling", y="% 5mC", title="%5mC by OM 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 8)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing missing values (geom_point).

Scatterplot of continuous OM 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=OM_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="OM in 6mo Pre-Sampling", y="% 5mC", title="%5mC by OM 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,10)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 50 rows containing non-finite values (stat_smooth).
## Warning: Removed 50 rows containing missing values (geom_point).

Scatterplot of continuous OM 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=OM_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="OM in 3mo Pre-Sampling", y="% 5mC", title="%5mC by OM 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,10)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).

Scatterplot of continuous OM 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=OM_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="OM in 1mo Pre-Sampling", y="% 5mC", title="%5mC by OM 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,10)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).

16.4 SS Scatterplots

Scatterplot of continuous SS 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SS_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SS in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by SS 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).

Scatterplot of continuous SS 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SS_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SS in 1yr Pre-Sampling", y="% 5mC", title="%5mC by SS 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 2.5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning: Removed 25 rows containing missing values (geom_point).

Scatterplot of continuous SS 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SS_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SS in 6mo Pre-Sampling", y="% 5mC", title="%5mC by SS 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

Scatterplot of continuous SS 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SS_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SS in 3mo Pre-Sampling", y="% 5mC", title="%5mC by SS 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 135 rows containing non-finite values (stat_smooth).
## Warning: Removed 135 rows containing missing values (geom_point).

Scatterplot of continuous SS 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=SS_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="SS in 1mo Pre-Sampling", y="% 5mC", title="%5mC by SS 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,5)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 154 rows containing non-finite values (stat_smooth).
## Warning: Removed 154 rows containing missing values (geom_point).

16.4.1 Soil Scatterplots

Scatterplot of continuous Soil 5yrs Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=Soil_5yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="Soil in 5yrs Pre-Sampling", y="% 5mC", title="%5mC by Soil 5yrs Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 3)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

Scatterplot of continuous Soil 1yr Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=Soil_1yrPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="Soil in 1yr Pre-Sampling", y="% 5mC", title="%5mC by Soil 1yr Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0, 3)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Scatterplot of continuous Soil 6mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=Soil_6moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="Soil in 6mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 6mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,3)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 49 rows containing non-finite values (stat_smooth).
## Warning: Removed 49 rows containing missing values (geom_point).

Scatterplot of continuous Soil 3mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=Soil_3moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="Soil in 3mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 3mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,3)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).

Scatterplot of continuous Soil 1mo Pre-Sampling vs %5mC

(dnam %>% ggplot(aes(x=Soil_1moPreSamp, y=pct_5mC))+
   geom_point()+
   labs(x="Soil in 1mo Pre-Sampling", y="% 5mC", title="%5mC by Soil 1mo Pre-Sampling")+
   geom_smooth(method="lm", se = FALSE) +
   theme(plot.title = element_text(hjust = 0.5))+
   xlim(0,3)+
   ylim(0, 0.5))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 155 rows containing non-finite values (stat_smooth).
## Warning: Removed 155 rows containing missing values (geom_point).

16.4.2 Violin Plots

Violin plot wrapping boxplot to visualize NO3 5yrs Pre-Sampling low vs high vs %5mC

#dnamb <- dnam %>% filter(!is.na(NO35yrCensor_dich))
#(dnamb %>% ggplot(aes(x=NO35yrCensor_dich, y=pct_5mC, fill=NO35yrCensor_dich))+
 #  geom_boxplot(width=0.2, color="black", alpha=1.0)+
  # geom_violin(width=1.0, alpha=0.5)+
  # labs(x="NO3 5yrs Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NO3 5yrs Pre-Sampling Low vs High")+
  # theme_light()+
  # theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
  # scale_fill_brewer(type="seq", palette="YlOrRd")+
  # ylim(0,0.25))

Violin plot wrapping boxplot to visualize NO3 5yrs pre-dx low vs high vs %5mC

#dnamb <- dnam %>% filter(!is.na(NO35yr_dich))
#(dnamb %>% ggplot(aes(x=NO35yr_dich, y=pct_5mC, fill=NO35yr_dich))+
#  geom_boxplot(width=0.2, color="black", alpha=1.0)+
#   geom_violin(width=1.0, alpha=0.5)+
#   labs(x="NO3 1yr Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NO3 1yr Pre-Sampling Low vs High")+
#  theme_light()+
#   theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
#   scale_fill_brewer(type="seq", palette="YlOrRd")+
#   ylim(0,0.25))

Violin plot wrapping boxplot to visualize NH4 5yrs Pre-Sampling low vs high vs %5mC

#dnamb <- dnam %>% filter(!is.na(NH45yrCensor_dich))
#(dnamb %>% ggplot(aes(x=NH45yrCensor_dich, y=pct_5mC, fill=NH45yrCensor_dich))+
#   geom_boxplot(width=0.2, color="black", alpha=1.0)+
#   geom_violin(width=1.0, alpha=0.5)+
#   labs(x="NH4 5yrs Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NH4 5yrs Pre-Sampling Low vs High")+
#   theme_light()+
#   theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
#   scale_fill_brewer(type="seq", palette="YlOrRd")+
#   ylim(0,0.25))

Violin plot wrapping boxplot to visualize NH4 5yrs pre-dx low vs high vs %5mC

#dnamb <- dnam %>% filter(!is.na(NH45yr_dich))
#(dnamb %>% ggplot(aes(x=NH45yr_dich, y=pct_5mC, fill=NH45yr_dich))+
#   geom_boxplot(width=0.2, color="black", alpha=1.0)+
#   geom_violin(width=1.0, alpha=0.5)+
#   labs(x="NH4 1yr Pre-Sampling Low vs High", y="% 5mC", title="% 5mC by NH4 1yr Pre-Sampling Low vs High")+
#   theme_light()+
#   theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
#   scale_fill_brewer(type="seq", palette="YlOrRd")+
#   ylim(0,0.25))

17 Linear Regression Analyses

17.1 PM2.5 Models

###Continuous PM2.5 5yr Pre-Censor

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0934 -0.3967 -0.0721  0.2850  5.3443 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.81077    0.11701 -58.209   <2e-16 ***
## PM_5yrPreSamp  0.01314    0.01389   0.946    0.344    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.6      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.002777
## Number of iterations: 215 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -7.04010171   -6.58144546
## PM_5yrPreSamp   -0.01407672    0.04036428
## (phi)         1916.52576149 2368.69237872

Impact of sex on model

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0824 -0.4035 -0.0728  0.2854  5.3383 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.81827    0.11803 -57.768   <2e-16 ***
## PM_5yrPreSamp  0.01338    0.01389   0.963    0.336    
## sexFemale      0.02206    0.04717   0.468    0.640    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.3      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003426
## Number of iterations: 720 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -7.04960352   -6.58693849
## PM_5yrPreSamp   -0.01385524    0.04060739
## sexFemale       -0.07038627    0.11450638
## (phi)         1917.12407737 2369.42710691

Impact of age_dx on model

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + age_dx, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + age_dx, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1140 -0.3988 -0.0680  0.2808  5.3697 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.715667   0.213437 -31.464   <2e-16 ***
## PM_5yrPreSamp  0.013071   0.013889   0.941    0.347    
## age_dx        -0.001377   0.002589  -0.532    0.595    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.5      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003644
## Number of iterations: 515 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)   -7.133996e+00 -6.297338e+00
## PM_5yrPreSamp -1.415025e-02  4.029250e-02
## age_dx        -6.450539e-03  3.697292e-03
## (phi)          1.917298e+03  2.369640e+03

Impact of smokeHx on model

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + smokeHx, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + smokeHx, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1297 -0.4123 -0.0766  0.2778  5.3355 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.79638    0.12019 -56.547   <2e-16 ***
## PM_5yrPreSamp  0.01321    0.01389   0.951    0.342    
## smokeHxEver   -0.02331    0.04272  -0.546    0.585    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.5      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003651
## Number of iterations: 474 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -7.03194766   -6.56081064
## PM_5yrPreSamp   -0.01401575    0.04043148
## smokeHxEver     -0.10704887    0.06042998
## (phi)         1917.34421122 2369.69747431

Impact of race on model

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + dich_Race, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + dich_Race, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0941 -0.3969 -0.0721  0.2849  5.3444 
## 
## Coefficients (mean model with logit link):
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.8108778  0.1178962 -57.770   <2e-16 ***
## PM_5yrPreSamp       0.0131611  0.0140862   0.934    0.350    
## dich_RaceNon-White -0.0007183  0.0919231  -0.008    0.994    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.6      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.002778
## Number of iterations: 150 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                           2.5 %        97.5 %
## (Intercept)          -7.0419501   -6.57980537
## PM_5yrPreSamp        -0.0144473    0.04076952
## dich_RaceNon-White   -0.1808843    0.17944763
## (phi)              1916.5258892 2368.69253558

Impact of urbanicity on model

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp + metro, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1020 -0.3935 -0.0688  0.2842  5.3570 
## 
## Coefficients (mean model with logit link):
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -6.78465    0.12441 -54.533   <2e-16 ***
## PM_5yrPreSamp      0.01066    0.01447   0.737    0.461    
## metromicropolitan -0.03226    0.07607  -0.424    0.672    
## metrorural        -0.03060    0.08223  -0.372    0.710    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.5      115.4   18.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4395 on 5 Df
## Pseudo R-squared: 0.003558
## Number of iterations: 271 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                           2.5 %        97.5 %
## (Intercept)         -7.02850159   -6.54080491
## PM_5yrPreSamp       -0.01769165    0.03901512
## metromicropolitan   -0.18135830    0.11684721
## metrorural          -0.19177756    0.13056969
## (phi)             1916.24680205 2368.67427891

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1387 -0.4058 -0.0784  0.2810  5.3563 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.713922   0.217764 -30.831   <2e-16 ***
## PM_5yrPreSamp  0.013304   0.013896   0.957    0.338    
## sexFemale      0.017245   0.047671   0.362    0.718    
## age_dx        -0.001293   0.002598  -0.498    0.619    
## smokeHxEver   -0.021535   0.043021  -0.501    0.617    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.8      115.5   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.004893
## Number of iterations: 225 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.140731e+00 -6.287113e+00
## PM_5yrPreSamp -1.393286e-02  4.054033e-02
## sexFemale     -7.618916e-02  1.106785e-01
## age_dx        -6.384218e-03  3.798731e-03
## smokeHxEver   -1.058546e-01  6.278466e-02
## (phi)          1.918478e+03  2.371090e+03

No significant association between PM_5yrPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1533 -0.4031 -0.0802  0.2751  5.3672 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.692229   0.222963 -30.015   <2e-16 ***
## PM_5yrPreSamp       0.010820   0.014640   0.739    0.460    
## sexFemale           0.016874   0.047773   0.353    0.724    
## age_dx             -0.001199   0.002604  -0.460    0.645    
## smokeHxEver        -0.023908   0.043118  -0.554    0.579    
## dich_RaceNon-White -0.004606   0.092172  -0.050    0.960    
## metromicropolitan  -0.030009   0.076190  -0.394    0.694    
## metrorural         -0.035227   0.082435  -0.427    0.669    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.7      115.5   18.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4395 on 9 Df
## Pseudo R-squared: 0.005725
## Number of iterations: 156 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)        -7.129228e+00 -6.255230e+00
## PM_5yrPreSamp      -1.787368e-02  3.951281e-02
## sexFemale          -7.675881e-02  1.105067e-01
## age_dx             -6.301724e-03  3.904336e-03
## smokeHxEver        -1.084168e-01  6.060122e-02
## dich_RaceNon-White -1.852603e-01  1.760482e-01
## metromicropolitan  -1.793379e-01  1.193196e-01
## metrorural         -1.967969e-01  1.263424e-01
## (phi)               1.918246e+03  2.371130e+03

No significant association between PM_5yrPreSamp and prop_5mC in this model.

17.1.1 Continuous PM2.5 1yr Pre-Sampling

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1109 -0.4071 -0.0763  0.2810  5.3473 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.79952    0.10347 -65.712   <2e-16 ***
## PM_1yrPreSamp  0.01289    0.01336   0.965    0.335    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.7      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.002929
## Number of iterations: 5000 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -7.0023218   -6.5967111
## PM_1yrPreSamp   -0.0133012    0.0390802
## (phi)         1916.6356029 2368.8271574

Impact of sex on model

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0982 -0.3999 -0.0738  0.2811  5.3410 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.80760    0.10479 -64.961   <2e-16 ***
## PM_1yrPreSamp  0.01320    0.01338   0.987    0.324    
## sexFemale      0.02266    0.04719   0.480    0.631    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.4      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003613
## Number of iterations: 285 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -7.01299567   -6.60220831
## PM_1yrPreSamp   -0.01301943    0.03942261
## sexFemale       -0.06983010    0.11514772
## (phi)         1917.26631533 2369.60167981

Impact of age_dx on model

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + age_dx, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + age_dx, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1333 -0.4007 -0.0732  0.2817  5.3724 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.705407   0.207456 -32.322   <2e-16 ***
## PM_1yrPreSamp  0.012769   0.013366   0.955    0.339    
## age_dx        -0.001358   0.002589  -0.524    0.600    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.6      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003753
## Number of iterations: 357 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)   -7.112013e+00 -6.298801e+00
## PM_1yrPreSamp -1.342809e-02  3.896698e-02
## age_dx        -6.432495e-03  3.717159e-03
## (phi)          1.917389e+03  2.369752e+03

Impact of smokeHx on model

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + smokeHx, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + smokeHx, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1493 -0.4087 -0.0810  0.2831  5.3387 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.78488    0.10702 -63.397   <2e-16 ***
## PM_1yrPreSamp  0.01291    0.01336   0.966    0.334    
## smokeHxEver   -0.02308    0.04273  -0.540    0.589    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2143.6      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.003806
## Number of iterations: 274 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.99464424   -6.57512435
## PM_1yrPreSamp   -0.01328436    0.03910143
## smokeHxEver     -0.10681696    0.06066362
## (phi)         1917.43483409 2369.80859211

Impact of race on model

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + dich_Race, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + dich_Race, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1111 -0.4072 -0.0763  0.2810  5.3474 
## 
## Coefficients (mean model with logit link):
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.7995895  0.1041294 -65.299   <2e-16 ***
## PM_1yrPreSamp       0.0129037  0.0135434   0.953    0.341    
## dich_RaceNon-White -0.0006357  0.0918550  -0.007    0.994    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.7      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 4 Df
## Pseudo R-squared: 0.00293
## Number of iterations: 177 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                            2.5 %        97.5 %
## (Intercept)          -7.00367933   -6.59549963
## PM_1yrPreSamp        -0.01364075    0.03944825
## dich_RaceNon-White   -0.18066813    0.17939669
## (phi)              1916.63568858 2368.82726247

Impact of urbanicity on model

dnam_model1 <- betareg(prop_5mC ~ PM_1yrPreSamp + metro, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1177 -0.3922 -0.0746  0.2870  5.3590 
## 
## Coefficients (mean model with logit link):
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -6.77512    0.11059 -61.265   <2e-16 ***
## PM_1yrPreSamp      0.01039    0.01395   0.745    0.456    
## metromicropolitan -0.03171    0.07618  -0.416    0.677    
## metrorural        -0.03040    0.08224  -0.370    0.712    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.5      115.4   18.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4395 on 5 Df
## Pseudo R-squared: 0.003622
## Number of iterations: 656 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                           2.5 %        97.5 %
## (Intercept)         -6.99187109   -6.55837723
## PM_1yrPreSamp       -0.01694166    0.03772986
## metromicropolitan   -0.18101946    0.11759671
## metrorural          -0.19159631    0.13079369
## (phi)             1916.28876554 2368.72575737

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1577 -0.3981 -0.0836  0.2781  5.3590 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.704267   0.212068 -31.614   <2e-16 ***
## PM_1yrPreSamp  0.013032   0.013384   0.974    0.330    
## sexFemale      0.017901   0.047697   0.375    0.707    
## age_dx        -0.001271   0.002598  -0.489    0.625    
## smokeHxEver   -0.021241   0.043021  -0.494    0.621    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.9      115.5   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4402 on 6 Df
## Pseudo R-squared: 0.005034
## Number of iterations: 333 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.119913e+00 -6.288621e+00
## PM_1yrPreSamp -1.320018e-02  3.926347e-02
## sexFemale     -7.558237e-02  1.113852e-01
## age_dx        -6.363744e-03  3.821857e-03
## smokeHxEver   -1.055617e-01  6.307937e-02
## (phi)          1.918578e+03  2.371213e+03

No significant association between PM_1yrPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1yrPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1685 -0.3993 -0.0758  0.2756  5.3692 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.683713   0.216686 -30.845   <2e-16 ***
## PM_1yrPreSamp       0.010522   0.014118   0.745    0.456    
## sexFemale           0.017410   0.047792   0.364    0.716    
## age_dx             -0.001184   0.002605  -0.454    0.650    
## smokeHxEver        -0.023639   0.043119  -0.548    0.584    
## dich_RaceNon-White -0.004422   0.092118  -0.048    0.962    
## metromicropolitan  -0.029512   0.076288  -0.387    0.699    
## metrorural         -0.035065   0.082441  -0.425    0.671    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.7      115.5   18.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4395 on 9 Df
## Pseudo R-squared: 0.005782
## Number of iterations: 133 (BFGS) + 2 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)        -7.108411e+00 -6.259016e+00
## PM_1yrPreSamp      -1.714769e-02  3.819216e-02
## sexFemale          -7.626092e-02  1.110812e-01
## age_dx             -6.288497e-03  3.921366e-03
## smokeHxEver        -1.081502e-01  6.087158e-02
## dich_RaceNon-White -1.849689e-01  1.761255e-01
## metromicropolitan  -1.790345e-01  1.200097e-01
## metrorural         -1.966460e-01  1.265169e-01
## (phi)               1.918279e+03  2.371170e+03

No significant association between PM_1yrPreSamp and prop_5mC in this model.

17.1.2 Continuous PM2.5 5yr Pre-Sample

dnam_model1 <- betareg(prop_5mC ~ PM_5yrPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0934 -0.3967 -0.0721  0.2850  5.3443 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.81077    0.11701 -58.209   <2e-16 ***
## PM_5yrPreSamp  0.01314    0.01389   0.946    0.344    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.6      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.002777
## Number of iterations: 215 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -7.04010171   -6.58144546
## PM_5yrPreSamp   -0.01407672    0.04036428
## (phi)         1916.52576149 2368.69237872

No significant association between PM_5yrPreSamp and prop_5mC in this model.

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1387 -0.4058 -0.0784  0.2810  5.3563 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.713922   0.217764 -30.831   <2e-16 ***
## PM_5yrPreSamp  0.013304   0.013896   0.957    0.338    
## sexFemale      0.017245   0.047671   0.362    0.718    
## age_dx        -0.001293   0.002598  -0.498    0.619    
## smokeHxEver   -0.021535   0.043021  -0.501    0.617    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.8      115.5   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.004893
## Number of iterations: 225 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.140731e+00 -6.287113e+00
## PM_5yrPreSamp -1.393286e-02  4.054033e-02
## sexFemale     -7.618916e-02  1.106785e-01
## age_dx        -6.384218e-03  3.798731e-03
## smokeHxEver   -1.058546e-01  6.278466e-02
## (phi)          1.918478e+03  2.371090e+03

No significant association between PM_5yrPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_5yrPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1533 -0.4031 -0.0802  0.2751  5.3672 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.692229   0.222963 -30.015   <2e-16 ***
## PM_5yrPreSamp       0.010820   0.014640   0.739    0.460    
## sexFemale           0.016874   0.047773   0.353    0.724    
## age_dx             -0.001199   0.002604  -0.460    0.645    
## smokeHxEver        -0.023908   0.043118  -0.554    0.579    
## dich_RaceNon-White -0.004606   0.092172  -0.050    0.960    
## metromicropolitan  -0.030009   0.076190  -0.394    0.694    
## metrorural         -0.035227   0.082435  -0.427    0.669    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.7      115.5   18.56   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4395 on 9 Df
## Pseudo R-squared: 0.005725
## Number of iterations: 156 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)        -7.129228e+00 -6.255230e+00
## PM_5yrPreSamp      -1.787368e-02  3.951281e-02
## sexFemale          -7.675881e-02  1.105067e-01
## age_dx             -6.301724e-03  3.904336e-03
## smokeHxEver        -1.084168e-01  6.060122e-02
## dich_RaceNon-White -1.852603e-01  1.760482e-01
## metromicropolitan  -1.793379e-01  1.193196e-01
## metrorural         -1.967969e-01  1.263424e-01
## (phi)               1.918246e+03  2.371130e+03

No significant association between PM_5yrPreSamp and prop_5mC in this model.

17.1.3 Continuous PM2.5 6mo Pre-Sample

dnam_model1 <- betareg(prop_5mC ~ PM_6moPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0820 -0.3952 -0.0748  0.2728  5.3638 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.759250   0.090423 -74.751   <2e-16 ***
## PM_6moPreSamp  0.006491   0.011520   0.563    0.573    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2157.1      117.8   18.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4282 on 3 Df
## Pseudo R-squared: 0.001044
## Number of iterations: 2160 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)     -6.93647652   -6.5820237
## PM_6moPreSamp   -0.01608737    0.0290689
## (phi)         1926.21409828 2388.0122850

No significant association between PM_6moPreSamp and prop_5mC in this model.

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1575 -0.3876 -0.0801  0.2805  5.3788 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.646045   0.205864 -32.284   <2e-16 ***
## PM_6moPreSamp  0.006454   0.011550   0.559    0.576    
## sexFemale      0.006335   0.048713   0.130    0.897    
## age_dx        -0.001412   0.002618  -0.539    0.590    
## smokeHxEver   -0.027536   0.043564  -0.632    0.527    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2159.5      117.9   18.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4283 on 6 Df
## Pseudo R-squared: 0.003397
## Number of iterations: 86 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.049531e+00 -6.242558e+00
## PM_6moPreSamp -1.618385e-02  2.909232e-02
## sexFemale     -8.914068e-02  1.018113e-01
## age_dx        -6.543327e-03  3.719134e-03
## smokeHxEver   -1.129206e-01  5.784837e-02
## (phi)          1.928313e+03  2.390597e+03

No significant association between PM_6moPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_6moPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1572 -0.3906 -0.0763  0.2773  5.3877 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.629700   0.208498 -31.797   <2e-16 ***
## PM_6moPreSamp       0.004332   0.012006   0.361    0.718    
## sexFemale           0.005841   0.048819   0.120    0.905    
## age_dx             -0.001314   0.002624  -0.501    0.616    
## smokeHxEver        -0.029603   0.043674  -0.678    0.498    
## dich_RaceNon-White  0.008271   0.091605   0.090    0.928    
## metromicropolitan  -0.032440   0.077340  -0.419    0.675    
## metrorural         -0.034537   0.082502  -0.419    0.675    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2160        118    18.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4276 on 9 Df
## Pseudo R-squared: 0.004416
## Number of iterations: 158 (BFGS) + 4 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)          -7.03834815 -6.221051e+00
## PM_6moPreSamp        -0.01919819  2.786296e-02
## sexFemale            -0.08984261  1.015238e-01
## age_dx               -0.00645720  3.828396e-03
## smokeHxEver          -0.11520271  5.599681e-02
## dich_RaceNon-White   -0.17127119  1.878129e-01
## metromicropolitan    -0.18402242  1.191431e-01
## metrorural           -0.19623758  1.271636e-01
## (phi)              1928.20685526  2.390809e+03

No significant association between PM_6moPreSamp and prop_5mC in this model.

17.1.4 Continuous PM2.5 3mo Pre-Sample

dnam_model1 <- betareg(prop_5mC ~ PM_3moPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0748 -0.3977 -0.0704  0.2855  5.3533 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.745942   0.081576 -82.695   <2e-16 ***
## PM_3moPreSamp  0.005387   0.010406   0.518    0.605    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2146.3      116.4   18.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4338 on 3 Df
## Pseudo R-squared: 0.0008507
## Number of iterations: 879 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.90582786   -6.58605623
## PM_3moPreSamp   -0.01500861    0.02578313
## (phi)         1918.09570173 2374.43953470

No significant association between PM_3moPreSamp and prop_5mC in this model.

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1420 -0.3905 -0.0779  0.2829  5.3678 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.635689   0.204009 -32.526   <2e-16 ***
## PM_3moPreSamp  0.005204   0.010431   0.499    0.618    
## sexFemale      0.003833   0.048501   0.079    0.937    
## age_dx        -0.001333   0.002616  -0.510    0.610    
## smokeHxEver   -0.028777   0.043304  -0.665    0.506    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2148.5      116.5   18.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4338 on 6 Df
## Pseudo R-squared: 0.003115
## Number of iterations: 86 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.035540e+00 -6.235837e+00
## PM_3moPreSamp -1.523952e-02  2.564756e-02
## sexFemale     -9.122724e-02  9.889350e-02
## age_dx        -6.461128e-03  3.794611e-03
## smokeHxEver   -1.136512e-01  5.609780e-02
## (phi)          1.920129e+03  2.376940e+03

No significant association between PM_3moPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_3moPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1591 -0.3939 -0.0642  0.2837  5.3792 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.620824   0.205968 -32.145   <2e-16 ***
## PM_3moPreSamp       0.003726   0.010644   0.350    0.726    
## sexFemale           0.003528   0.048581   0.073    0.942    
## age_dx             -0.001276   0.002622  -0.487    0.627    
## smokeHxEver        -0.031045   0.043403  -0.715    0.474    
## dich_RaceNon-White  0.005997   0.091259   0.066    0.948    
## metromicropolitan  -0.032043   0.076177  -0.421    0.674    
## metrorural         -0.042457   0.081949  -0.518    0.604    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2149.0      116.6   18.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4332 on 9 Df
## Pseudo R-squared: 0.004439
## Number of iterations: 144 (BFGS) + 4 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)        -7.024514e+00 -6.217135e+00
## PM_3moPreSamp      -1.713528e-02  2.458680e-02
## sexFemale          -9.168874e-02  9.874514e-02
## age_dx             -6.414744e-03  3.862837e-03
## smokeHxEver        -1.161136e-01  5.402423e-02
## dich_RaceNon-White -1.728670e-01  1.848611e-01
## metromicropolitan  -1.813470e-01  1.172606e-01
## metrorural         -2.030742e-01  1.181609e-01
## (phi)               1.920354e+03  2.377551e+03

No significant association between PM_3moPreSamp and prop_5mC in this model.

17.1.5 Continuous PM2.5 1mo Pre-Sample

dnam_model1 <- betareg(prop_5mC ~ PM_1moPreSamp, dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0726 -0.3971 -0.0639  0.2809  5.3385 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.722037   0.076019 -88.426   <2e-16 ***
## PM_1moPreSamp  0.002840   0.009623   0.295    0.768    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2127.3      114.9   18.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4369 on 3 Df
## Pseudo R-squared: 0.0002713
## Number of iterations: 613 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %        97.5 %
## (Intercept)     -6.8710315   -6.57304347
## PM_1moPreSamp   -0.0160201    0.02170076
## (phi)         1901.9886325 2352.53440447

No significant association between PM_1moPreSamp and prop_5mC in this model.

Partial model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1189 -0.3941 -0.0776  0.2718  5.3498 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.631128   0.202248 -32.787   <2e-16 ***
## PM_1moPreSamp  0.002846   0.009644   0.295    0.768    
## sexFemale      0.015608   0.047845   0.326    0.744    
## age_dx        -0.001180   0.002620  -0.450    0.652    
## smokeHxEver   -0.021775   0.043234  -0.504    0.615    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2129        115   18.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4369 on 6 Df
## Pseudo R-squared: 0.002195
## Number of iterations: 254 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.027527e+00 -6.234729e+00
## PM_1moPreSamp -1.605698e-02  2.174809e-02
## sexFemale     -7.816577e-02  1.093825e-01
## age_dx        -6.314977e-03  3.954973e-03
## smokeHxEver   -1.065119e-01  6.296159e-02
## (phi)          1.903729e+03  2.354673e+03

No significant association between PM_1moPreSamp and prop_5mC in this model.

Complete model with age_dx, sex, smokeHx, race, urbanicity

dnam_model3 <- betareg(prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx + dich_Race + metro, data=dnam)
summary(dnam_model3)
## 
## Call:
## betareg(formula = prop_5mC ~ PM_1moPreSamp + sex + age_dx + smokeHx + 
##     dich_Race + metro, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1399 -0.3904 -0.0749  0.2745  5.3622 
## 
## Coefficients (mean model with logit link):
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -6.619314   0.203572 -32.516   <2e-16 ***
## PM_1moPreSamp       0.001641   0.009784   0.168    0.867    
## sexFemale           0.015602   0.047948   0.325    0.745    
## age_dx             -0.001091   0.002625  -0.416    0.678    
## smokeHxEver        -0.024175   0.043324  -0.558    0.577    
## dich_RaceNon-White  0.003114   0.091452   0.034    0.973    
## metromicropolitan  -0.039626   0.075655  -0.524    0.600    
## metrorural         -0.047253   0.081853  -0.577    0.564    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2130.1      115.2    18.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4363 on 9 Df
## Pseudo R-squared: 0.003964
## Number of iterations: 153 (BFGS) + 3 (Fisher scoring)
confint(dnam_model3)
##                            2.5 %        97.5 %
## (Intercept)        -7.018307e+00 -6.220321e+00
## PM_1moPreSamp      -1.753538e-02  2.081713e-02
## sexFemale          -7.837399e-02  1.095773e-01
## age_dx             -6.236622e-03  4.053837e-03
## smokeHxEver        -1.090879e-01  6.073764e-02
## dich_RaceNon-White -1.761295e-01  1.823570e-01
## metromicropolitan  -1.879074e-01  1.086562e-01
## metrorural         -2.076825e-01  1.131756e-01
## (phi)               1.904349e+03  2.355761e+03

No significant association between PM_1moPreSamp and prop_5mC in this model.

17.2 SO4 Models

17.2.1 Continuous SO4 in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SO4_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0892 -0.3952 -0.0703  0.2762  5.3447 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.76638    0.07432 -91.045   <2e-16 ***
## SO4_5yrPreSamp  0.04424    0.04876   0.907    0.364    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.4      115.3   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.002578
## Number of iterations: 5000 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.91204137   -6.6207150
## SO4_5yrPreSamp   -0.05132734    0.1398149
## (phi)          1916.34227006 2368.4671320

No significant association between SO4_5yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SO4_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1342 -0.4027 -0.0757  0.2774  5.3580 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.668598   0.199047 -33.503   <2e-16 ***
## SO4_5yrPreSamp  0.042210   0.048809   0.865    0.387    
## sexFemale       0.015039   0.047654   0.316    0.752    
## age_dx         -0.001249   0.002599  -0.480    0.631    
## smokeHxEver    -0.020191   0.043070  -0.469    0.639    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2144.3      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.004426
## Number of iterations: 365 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.058722e+00   -6.27847285
## SO4_5yrPreSamp -5.345490e-02    0.13787442
## sexFemale      -7.836102e-02    0.10843943
## age_dx         -6.342786e-03    0.00384573
## smokeHxEver    -1.046065e-01    0.06422448
## (phi)           1.918030e+03 2370.54001805

No significant association between SO4_5yrPreSamp and prop_5mC in this model.

17.2.2 Continuous SO4 in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SO4_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1380 -0.3999 -0.0628  0.2688  5.3355 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.79894    0.07751 -87.713   <2e-16 ***
## SO4_1yrPreSamp  0.08866    0.06714   1.321    0.187    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2135.5      115.3   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 3 Df
## Pseudo R-squared: 0.005371
## Number of iterations: 1068 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.95086658   -6.6470194
## SO4_1yrPreSamp   -0.04293048    0.2202559
## (phi)          1909.48632079 2361.4149580

No significant association between SO4_1yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SO4_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1842 -0.3928 -0.0668  0.2749  5.3485 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.700623   0.198852 -33.696   <2e-16 ***
## SO4_1yrPreSamp  0.087698   0.067163   1.306    0.192    
## sexFemale       0.015546   0.047828   0.325    0.745    
## age_dx         -0.001285   0.002602  -0.494    0.621    
## smokeHxEver    -0.020307   0.043143  -0.471    0.638    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2137.4      115.4   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4377 on 6 Df
## Pseudo R-squared: 0.00734
## Number of iterations: 274 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.090366e+00 -6.310879e+00
## SO4_1yrPreSamp -4.393938e-02  2.193344e-01
## sexFemale      -7.819503e-02  1.092870e-01
## age_dx         -6.384944e-03  3.814719e-03
## smokeHxEver    -1.048662e-01  6.425219e-02
## (phi)           1.911242e+03  2.363572e+03

No significant association between SO4_1yrPreSamp and prop_5mC in this model.

17.2.3 Continuous SO4 in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SO4_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0987 -0.3929 -0.0751  0.2734  5.3065 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.76915    0.07342 -92.198   <2e-16 ***
## SO4_6moPreSamp  0.06060    0.06505   0.932    0.352    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2112.6      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 0.003063
## Number of iterations: 5000 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.91304860   -6.6252482
## SO4_6moPreSamp   -0.06690139    0.1880926
## (phi)          1883.39659609 2341.7071950

No significant association between SO4_6moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SO4_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1645 -0.3944 -0.0696  0.2734  5.3213 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.659102   0.199519 -33.376   <2e-16 ***
## SO4_6moPreSamp  0.060058   0.065067   0.923    0.356    
## sexFemale       0.003989   0.049412   0.081    0.936    
## age_dx         -0.001354   0.002645  -0.512    0.609    
## smokeHxEver    -0.027577   0.044334  -0.622    0.534    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2115        117   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4169 on 6 Df
## Pseudo R-squared: 0.005279
## Number of iterations: 307 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.050152e+00 -6.268051e+00
## SO4_6moPreSamp -6.747117e-02  1.875866e-01
## sexFemale      -9.285614e-02  1.008334e-01
## age_dx         -6.537529e-03  3.828911e-03
## smokeHxEver    -1.144696e-01  5.931470e-02
## (phi)           1.885328e+03  2.344093e+03

No significant association between SO4_6moPreSamp and prop_5mC in this model.

17.2.4 Continuous SO4 in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SO4_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0862 -0.3801 -0.0591  0.2660  5.0351 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.68446    0.07473 -89.453   <2e-16 ***
## SO4_3moPreSamp  0.03003    0.06580   0.456    0.648    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1866.1      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.0008348
## Number of iterations: 1705 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %      97.5 %
## (Intercept)      -6.83092254   -6.538001
## SO4_3moPreSamp   -0.09894427    0.159000
## (phi)          1649.49370731 2082.620766

No significant association between SO4_3moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SO4_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1627 -0.3992 -0.0586  0.2692  5.0393 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.6197749  0.2124773 -31.155   <2e-16 ***
## SO4_3moPreSamp  0.0306121  0.0658279   0.465    0.642    
## sexFemale      -0.0094719  0.0543310  -0.174    0.862    
## age_dx         -0.0005727  0.0028849  -0.199    0.843    
## smokeHxEver    -0.0374484  0.0488809  -0.766    0.444    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1868.1      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.003239
## Number of iterations: 305 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.036223e+00   -6.20332715
## SO4_3moPreSamp -9.840814e-02    0.15963240
## sexFemale      -1.159586e-01    0.09701486
## age_dx         -6.226886e-03    0.00508153
## smokeHxEver    -1.332533e-01    0.05835647
## (phi)           1.651310e+03 2084.89644145

No significant association between SO4_3moPreSamp and prop_5mC in this model.

17.2.5 Continuous SO4 in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SO4_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0340 -0.3976 -0.0658  0.2728  4.9454 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.646757   0.075024 -88.595   <2e-16 ***
## SO4_1moPreSamp  0.009376   0.066090   0.142    0.887    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.7      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 8.369e-05
## Number of iterations: 465 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %      97.5 %
## (Intercept)      -6.7938014   -6.499712
## SO4_1moPreSamp   -0.1201581    0.138911
## (phi)          1576.8984016 1998.570166

No significant association between SO4_1moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SO4_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SO4_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0669 -0.4024 -0.0671  0.2745  4.9472 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.5876885  0.2175829 -30.277   <2e-16 ***
## SO4_1moPreSamp  0.0099026  0.0661027   0.150    0.881    
## sexFemale      -0.0007889  0.0552074  -0.014    0.989    
## age_dx         -0.0005254  0.0029701  -0.177    0.860    
## smokeHxEver    -0.0365086  0.0504846  -0.723    0.470    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.6      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 6 Df
## Pseudo R-squared: 0.002302
## Number of iterations: 216 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.014143e+00 -6.161234e+00
## SO4_1moPreSamp -1.196563e-01  1.394616e-01
## sexFemale      -1.089934e-01  1.074156e-01
## age_dx         -6.346717e-03  5.296005e-03
## smokeHxEver    -1.354567e-01  6.243944e-02
## (phi)           1.578524e+03  2.000615e+03

No significant association between SO4_1moPreSamp and prop_5mC in this model.

17.3 NO3 Models

17.3.1 Continuous NO3 in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NO3_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.3435 -0.4032 -0.0508  0.2943  5.4147 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.76826    0.04216 -160.545   <2e-16 ***
## NO3_5yrPreSamp  0.07214    0.03853    1.872   0.0612 .  
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2150.6      115.8   18.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4402 on 3 Df
## Pseudo R-squared: 0.01097
## Number of iterations: 5000 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)    -6.850886e+00   -6.6856298
## NO3_5yrPreSamp -3.376669e-03    0.1476661
## (phi)           1.923705e+03 2377.5067887

Nitrate is marginally associated with higher global DNAm in this base model.

Adjuted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NO3_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.3222 -0.3964 -0.0593  0.2963  5.4232 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.686531   0.188612 -35.451   <2e-16 ***
## NO3_5yrPreSamp  0.071439   0.038593   1.851   0.0642 .  
## sexFemale       0.017015   0.047579   0.358   0.7206    
## age_dx         -0.001049   0.002601  -0.403   0.6868    
## smokeHxEver    -0.020931   0.042964  -0.487   0.6261    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2152.5      115.9   18.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4403 on 6 Df
## Pseudo R-squared: 0.01275
## Number of iterations: 373 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.056204e+00 -6.316859e+00
## NO3_5yrPreSamp -4.200898e-03  1.470796e-01
## sexFemale      -7.623892e-02  1.102688e-01
## age_dx         -6.145934e-03  4.048761e-03
## smokeHxEver    -1.051387e-01  6.327709e-02
## (phi)           1.925364e+03  2.379544e+03

Nitrate is marginally associated with higher global DNAm in this base model.

17.3.2 Continuous NO3 in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NO3_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.4427 -0.4082 -0.0509  0.2934  5.4263 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.77738    0.04117 -164.611   <2e-16 ***
## NO3_1yrPreSamp  0.10093    0.04508    2.239   0.0252 *  
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2145.5      115.8   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4378 on 3 Df
## Pseudo R-squared: 0.01595
## Number of iterations: 1089 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.85807589   -6.6966847
## NO3_1yrPreSamp    0.01257504    0.1892856
## (phi)          1918.48789591 2372.4708814

Higher nitrate is associated with higher global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NO3_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.4199 -0.3957 -0.0516  0.2941  5.4336 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.7022647  0.1890792 -35.447   <2e-16 ***
## NO3_1yrPreSamp  0.1001976  0.0451771   2.218   0.0266 *  
## sexFemale       0.0184402  0.0477476   0.386   0.6993    
## age_dx         -0.0009692  0.0026074  -0.372   0.7101    
## smokeHxEver    -0.0198825  0.0430671  -0.462   0.6443    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2147.3      115.9   18.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4378 on 6 Df
## Pseudo R-squared: 0.01763
## Number of iterations: 501 (BFGS) + 5 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.072853e+00 -6.331676e+00
## NO3_1yrPreSamp  1.165209e-02  1.887430e-01
## sexFemale      -7.514352e-02  1.120238e-01
## age_dx         -6.079582e-03  4.141149e-03
## smokeHxEver    -1.042924e-01  6.452748e-02
## (phi)           1.920082e+03  2.374430e+03

Higher nitrate is associated with higher global DNAm in this model.

17.3.3 Continuous NO3 in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NO3_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1621 -0.3978 -0.0656  0.2902  5.3539 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.73425    0.03496 -192.612   <2e-16 ***
## NO3_6moPreSamp  0.03681    0.03184    1.156    0.248    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2114        117   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4169 on 3 Df
## Pseudo R-squared: 0.004742
## Number of iterations: 269 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %        97.5 %
## (Intercept)      -6.80277289   -6.66572146
## NO3_6moPreSamp   -0.02560227    0.09921783
## (phi)          1884.73539756 2343.35973604

No assoc of nitrate with global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NO3_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1943 -0.3965 -0.0704  0.2882  5.3698 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.623085   0.188999 -35.043   <2e-16 ***
## NO3_6moPreSamp  0.036051   0.031924   1.129    0.259    
## sexFemale       0.004530   0.049428   0.092    0.927    
## age_dx         -0.001394   0.002649  -0.526    0.599    
## smokeHxEver    -0.025160   0.044374  -0.567    0.571    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2116.0      117.1   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4169 on 6 Df
## Pseudo R-squared: 0.006826
## Number of iterations: 547 (BFGS) + 6 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -6.993516e+00 -6.252654e+00
## NO3_6moPreSamp -2.651904e-02  9.862158e-02
## sexFemale      -9.234657e-02  1.014075e-01
## age_dx         -6.585967e-03  3.797384e-03
## smokeHxEver    -1.121313e-01  6.181226e-02
## (phi)           1.886525e+03  2.345570e+03

No assoc of nitrate with global DNAm in this model.

17.3.4 Continuous NO3 in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NO3_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0763 -0.3982 -0.0680  0.2768  5.0419 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.66682    0.03525 -189.143   <2e-16 ***
## NO3_3moPreSamp  0.01893    0.03149    0.601    0.548    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1866.5      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.001384
## Number of iterations: 704 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %      97.5 %
## (Intercept)      -6.7359016   -6.597734
## NO3_3moPreSamp   -0.0427905    0.080653
## (phi)          1649.9126823 2083.145562

No association between nitrate and global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NO3_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1357 -0.4020 -0.0571  0.2715  5.0451 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.6075956  0.2041558 -32.365   <2e-16 ***
## NO3_3moPreSamp  0.0182192  0.0315548   0.577    0.564    
## sexFemale      -0.0084106  0.0543709  -0.155    0.877    
## age_dx         -0.0004864  0.0028890  -0.168    0.866    
## smokeHxEver    -0.0367234  0.0488811  -0.751    0.452    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1868.5      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.003666
## Number of iterations: 164 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.007734e+00 -6.207458e+00
## NO3_3moPreSamp -4.362708e-02  8.006540e-02
## sexFemale      -1.149755e-01  9.815440e-02
## age_dx         -6.148821e-03  5.176041e-03
## smokeHxEver    -1.325286e-01  5.908184e-02
## (phi)           1.651622e+03  2.085287e+03

No association between nitrate and global DNAm in this model.

17.3.5 Continuous NO3 in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NO3_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0355 -0.4031 -0.0654  0.2738  4.9471 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.6369180  0.0367755 -180.471   <2e-16 ***
## NO3_1moPreSamp  0.0001452  0.0346114    0.004    0.997    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.7      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 6.847e-08
## Number of iterations: 1906 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.70899672   -6.5648393
## NO3_1moPreSamp   -0.06769194    0.0679824
## (phi)          1576.84017979 1998.4969771

No association between nitrate and global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NO3_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NO3_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0818 -0.4059 -0.0679  0.2752  4.9490 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.5784931  0.2096448 -31.379   <2e-16 ***
## NO3_1moPreSamp  0.0005675  0.0346469   0.016    0.987    
## sexFemale      -0.0009629  0.0552040  -0.017    0.986    
## age_dx         -0.0005114  0.0029732  -0.172    0.863    
## smokeHxEver    -0.0365340  0.0504934  -0.724    0.469    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.5      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 6 Df
## Pseudo R-squared: 0.002213
## Number of iterations: 125 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -6.989389e+00 -6.167597e+00
## NO3_1moPreSamp -6.733931e-02  6.847422e-02
## sexFemale      -1.091607e-01  1.072349e-01
## age_dx         -6.338706e-03  5.315905e-03
## smokeHxEver    -1.354992e-01  6.243123e-02
## (phi)           1.578460e+03  2.000533e+03

No association between nitrate and global DNAm in this model.

17.4 NH4 Models

17.4.1 Continuous NH4 in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NH4_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1488 -0.3940 -0.0575  0.2971  5.3882 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.78685    0.05145 -131.908   <2e-16 ***
## NH4_5yrPreSamp  0.17700    0.09601    1.844   0.0652 .  
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2150.2      115.7   18.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4402 on 3 Df
## Pseudo R-squared: 0.01036
## Number of iterations: 3623 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)      -6.8876896   -6.6860046
## NH4_5yrPreSamp   -0.0111714    0.3651737
## (phi)          1923.3501515 2377.0720027

Higher NH4 marginally associated with higher global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NH4_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1685 -0.4036 -0.0593  0.2770  5.3962 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.7127980  0.1929334 -34.793   <2e-16 ***
## NH4_5yrPreSamp  0.1715980  0.0963726   1.781    0.075 .  
## sexFemale       0.0142767  0.0475909   0.300    0.764    
## age_dx         -0.0009185  0.0026048  -0.353    0.724    
## smokeHxEver    -0.0187735  0.0430170  -0.436    0.663    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2151.6      115.8   18.58   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4403 on 6 Df
## Pseudo R-squared: 0.01174
## Number of iterations: 150 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.090941e+00 -6.334655e+00
## NH4_5yrPreSamp -1.728887e-02  3.604849e-01
## sexFemale      -7.899988e-02  1.075532e-01
## age_dx         -6.023946e-03  4.186848e-03
## smokeHxEver    -1.030853e-01  6.553827e-02
## (phi)           1.924606e+03  2.378614e+03

Higher NH4 marginally associated with higher global DNAm in this model.

17.4.2 Continuous NH4 in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NH4_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.2285 -0.3894 -0.0526  0.2861  5.3791 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.77129    0.04509 -150.18   <2e-16 ***
## NH4_1yrPreSamp  0.25222    0.13935    1.81   0.0703 .  
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2140.0      115.5   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4377 on 3 Df
## Pseudo R-squared: 0.009959
## Number of iterations: 641 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)      -6.85965816   -6.6829198
## NH4_1yrPreSamp   -0.02089795    0.5253479
## (phi)          1913.57989964 2366.4435174

Higher NH4 marginally associated with higher global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NH4_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.2094 -0.3837 -0.0633  0.2862  5.3896 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.687778   0.189614 -35.271   <2e-16 ***
## NH4_1yrPreSamp  0.246605   0.139606   1.766   0.0773 .  
## sexFemale       0.015851   0.047791   0.332   0.7401    
## age_dx         -0.001085   0.002607  -0.416   0.6772    
## smokeHxEver    -0.017962   0.043140  -0.416   0.6771    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2141.6      115.6   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4377 on 6 Df
## Pseudo R-squared: 0.01164
## Number of iterations: 268 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.059414e+00 -6.316142e+00
## NH4_1yrPreSamp -2.701741e-02  5.202272e-01
## sexFemale      -7.781759e-02  1.095198e-01
## age_dx         -6.194239e-03  4.024194e-03
## smokeHxEver    -1.025148e-01  6.659066e-02
## (phi)           1.914987e+03  2.368172e+03

Higher NH4 marginally associated with higher global DNAm in this model.

17.4.3 Continuous NH4 in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NH4_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0588 -0.3997 -0.0670  0.2814  5.3270 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.72360    0.03929 -171.111   <2e-16 ***
## NH4_6moPreSamp  0.06665    0.10977    0.607    0.544    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2110.9      116.8   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 0.001321
## Number of iterations: 1439 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)      -6.8006102   -6.6465813
## NH4_6moPreSamp   -0.1484991    0.2817968
## (phi)          1881.9369985 2339.9046836

Higher NH4 not associated with global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NH4_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1161 -0.3919 -0.0731  0.2818  5.3425 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.611737   0.189560 -34.879   <2e-16 ***
## NH4_6moPreSamp  0.062263   0.110190   0.565    0.572    
## sexFemale       0.003808   0.049437   0.077    0.939    
## age_dx         -0.001384   0.002649  -0.523    0.601    
## smokeHxEver    -0.026053   0.044430  -0.586    0.558    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2113.0      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 6 Df
## Pseudo R-squared: 0.00347
## Number of iterations: 345 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -6.983267e+00 -6.240207e+00
## NH4_6moPreSamp -1.537054e-01  2.782309e-01
## sexFemale      -9.308685e-02  1.007023e-01
## age_dx         -6.575586e-03  3.807315e-03
## smokeHxEver    -1.131343e-01  6.102747e-02
## (phi)           1.883758e+03  2.342154e+03

Higher NH4 not associated with global DNAm in this model.

17.4.4 Continuous NH4 in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NH4_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0794 -0.4001 -0.0674  0.2759  5.0393 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.66813    0.03927 -169.800   <2e-16 ***
## NH4_3moPreSamp  0.05752    0.10835    0.531    0.596    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1866.3      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.001082
## Number of iterations: 1019 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)      -6.745101   -6.5911634
## NH4_3moPreSamp   -0.154847    0.2698831
## (phi)          1649.688564 2082.8648333

Higher NH4 not associated with global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NH4_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1502 -0.4042 -0.0573  0.2652  5.0430 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.6084645  0.2051993 -32.205   <2e-16 ***
## NH4_3moPreSamp  0.0533942  0.1086784   0.491    0.623    
## sexFemale      -0.0088638  0.0543484  -0.163    0.870    
## age_dx         -0.0004866  0.0028885  -0.168    0.866    
## smokeHxEver    -0.0362977  0.0488935  -0.742    0.458    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1868.2      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.003351
## Number of iterations: 190 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -7.010648e+00 -6.206281e+00
## NH4_3moPreSamp -1.596116e-01  2.664000e-01
## sexFemale      -1.153848e-01  9.765716e-02
## age_dx         -6.147989e-03  5.174696e-03
## smokeHxEver    -1.321273e-01  5.953186e-02
## (phi)           1.651356e+03  2.084953e+03

Higher NH4 not associated with global DNAm in this model.

17.4.5 Continuous NH4 in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ NH4_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0354 -0.4029 -0.0651  0.2740  4.9468 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -6.637143   0.039647 -167.408   <2e-16 ***
## NH4_1moPreSamp  0.001264   0.111966    0.011    0.991    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.7      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 5.137e-07
## Number of iterations: 270 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)      -6.7148489   -6.5594372
## NH4_1moPreSamp   -0.2181853    0.2207123
## (phi)          1576.8404943 1998.4973724

Higher NH4 not associated with global DNAm in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ NH4_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ NH4_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0807 -0.4062 -0.0683  0.2747  4.9488 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -6.5777754  0.2101242 -31.304   <2e-16 ***
## NH4_1moPreSamp -0.0004349  0.1121467  -0.004    0.997    
## sexFemale      -0.0009723  0.0552015  -0.018    0.986    
## age_dx         -0.0005143  0.0029722  -0.173    0.863    
## smokeHxEver    -0.0365109  0.0504852  -0.723    0.470    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.5      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 6 Df
## Pseudo R-squared: 0.002212
## Number of iterations: 203 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                        2.5 %        97.5 %
## (Intercept)    -6.989611e+00 -6.165939e+00
## NH4_1moPreSamp -2.202384e-01  2.193686e-01
## sexFemale      -1.091653e-01  1.072207e-01
## age_dx         -6.339663e-03  5.311005e-03
## smokeHxEver    -1.354600e-01  6.243819e-02
## (phi)           1.578459e+03  2.000532e+03

Higher NH4 not associated with global DNAm in this model.

17.5 BC Models

17.5.1 Continuous BC in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ BC_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0762 -0.3996 -0.0655  0.2816  5.3468 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.73713    0.06805 -99.002   <2e-16 ***
## BC_5yrPreSamp  0.05666    0.10426   0.543    0.587    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2140.8      115.3   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.0008914
## Number of iterations: 417 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -6.8705094   -6.6037560
## BC_5yrPreSamp   -0.1476886    0.2610144
## (phi)         1914.8608419 2366.6480173

No significant association between BC_5yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ BC_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1240 -0.4023 -0.0760  0.2732  5.3592 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.637310   0.194808 -34.071   <2e-16 ***
## BC_5yrPreSamp  0.059213   0.104343   0.567    0.570    
## sexFemale      0.016935   0.047714   0.355    0.723    
## age_dx        -0.001340   0.002599  -0.516    0.606    
## smokeHxEver   -0.021330   0.043039  -0.496    0.620    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.9      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.003019
## Number of iterations: 303 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.019126e+00 -6.255494e+00
## BC_5yrPreSamp -1.452962e-01  2.637227e-01
## sexFemale     -7.658207e-02  1.104526e-01
## age_dx        -6.433826e-03  3.753028e-03
## smokeHxEver   -1.056842e-01  6.302484e-02
## (phi)          1.916831e+03  2.369067e+03

No significant association between BC_5yrPreSamp and prop_5mC in this model.

17.5.2 Continuous BC in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ BC_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0738 -0.3922 -0.0673  0.2754  5.3467 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.704222   0.073541  -91.16   <2e-16 ***
## BC_1yrPreSamp  0.004687   0.117743    0.04    0.968    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2130        115   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4375 on 3 Df
## Pseudo R-squared: 5.032e-06
## Number of iterations: 634 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -6.8483601   -6.5600841
## BC_1yrPreSamp   -0.2260847    0.2354577
## (phi)         1904.6991624 2355.5331992

No significant association between BC_1yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ BC_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1226 -0.3988 -0.0727  0.2705  5.3595 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.603723   0.196535 -33.601   <2e-16 ***
## BC_1yrPreSamp  0.005677   0.117746   0.048    0.962    
## sexFemale      0.015400   0.047872   0.322    0.748    
## age_dx        -0.001330   0.002606  -0.510    0.610    
## smokeHxEver   -0.021433   0.043186  -0.496    0.620    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2132.2      115.1   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 6 Df
## Pseudo R-squared: 0.002069
## Number of iterations: 216 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.988925e+00 -6.218520e+00
## BC_1yrPreSamp -2.251022e-01  2.364556e-01
## sexFemale     -7.842704e-02  1.092270e-01
## age_dx        -6.438391e-03  3.778581e-03
## smokeHxEver   -1.060762e-01  6.320951e-02
## (phi)          1.906566e+03  2.357827e+03

No significant association between BC_1yrPreSamp and prop_5mC in this model.

17.5.3 Continuous BC in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ BC_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0733 -0.3898 -0.0622  0.2730  5.3295 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.67222    0.06107 -109.247   <2e-16 ***
## BC_6moPreSamp -0.05189    0.09080   -0.571    0.568    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2110.8      116.8   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 0.001181
## Number of iterations: 417 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -6.7919213   -6.5525135
## BC_6moPreSamp   -0.2298576    0.1260793
## (phi)         1881.8241353 2339.7654183

No significant association between BC_6moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ BC_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0892 -0.3864 -0.0644  0.2765  5.3422 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.568496   0.193328 -33.976   <2e-16 ***
## BC_6moPreSamp -0.049928   0.091005  -0.549    0.583    
## sexFemale      0.002725   0.049446   0.055    0.956    
## age_dx        -0.001273   0.002652  -0.480    0.631    
## smokeHxEver   -0.028711   0.044361  -0.647    0.517    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2112.9      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 6 Df
## Pseudo R-squared: 0.003391
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)     -6.94741302 -6.189580e+00
## BC_6moPreSamp   -0.22829472  1.284384e-01
## sexFemale       -0.09418784  9.963843e-02
## age_dx          -0.00647144  3.925068e-03
## smokeHxEver     -0.11565760  5.823464e-02
## (phi)         1883.72066410  2.342107e+03

No significant association between BC_6moPreSamp and prop_5mC in this model.

17.5.4 Continuous BC in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ BC_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1362 -0.3898 -0.0609  0.2757  5.0379 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.69549    0.06489 -103.181   <2e-16 ***
## BC_3moPreSamp  0.07272    0.10040    0.724    0.469    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1867.1      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.002054
## Number of iterations: 1264 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -6.8226739   -6.5683068
## BC_3moPreSamp   -0.1240575    0.2694969
## (phi)         1650.3998993 2083.7559858

No significant association between BC_3moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ BC_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.2169 -0.3994 -0.0541  0.2852  5.0416 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.6333093  0.2109832 -31.440   <2e-16 ***
## BC_3moPreSamp  0.0747411  0.1004006   0.744    0.457    
## sexFemale     -0.0099368  0.0543158  -0.183    0.855    
## age_dx        -0.0005352  0.0028855  -0.185    0.853    
## smokeHxEver   -0.0382629  0.0488702  -0.783    0.434    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1869.2      110.7   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.004468
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.046829e+00 -6.219790e+00
## BC_3moPreSamp -1.220405e-01  2.715228e-01
## sexFemale     -1.163939e-01  9.652030e-02
## age_dx        -6.190719e-03  5.120365e-03
## smokeHxEver   -1.340468e-01  5.752102e-02
## (phi)          1.652287e+03  2.086121e+03

No significant association between BC_3moPreSamp and prop_5mC in this model.

17.5.5 Continuous BC in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ BC_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0184 -0.4017 -0.0608  0.2672  4.9455 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.67446    0.06353 -105.061   <2e-16 ***
## BC_1moPreSamp  0.06410    0.09790    0.655    0.513    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.0      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 0.001655
## Number of iterations: 1083 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %       97.5 %
## (Intercept)     -6.7989713   -6.5499406
## BC_1moPreSamp   -0.1277916    0.2559862
## (phi)         1578.0419781 2000.0078659

No significant association between BC_1moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ BC_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ BC_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0378 -0.4050 -0.0573  0.2834  4.9465 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.6174022  0.2156676 -30.683   <2e-16 ***
## BC_1moPreSamp  0.0672957  0.0979361   0.687    0.492    
## sexFemale     -0.0002692  0.0551875  -0.005    0.996    
## age_dx        -0.0005044  0.0029693  -0.170    0.865    
## smokeHxEver   -0.0378730  0.0504746  -0.750    0.453    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1791.0      107.8   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3500 on 6 Df
## Pseudo R-squared: 0.003916
## Number of iterations: 196 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.040103e+00 -6.194701e+00
## BC_1moPreSamp -1.246556e-01  2.592469e-01
## sexFemale     -1.084347e-01  1.078964e-01
## age_dx        -6.324207e-03  5.315352e-03
## smokeHxEver   -1.368014e-01  6.105546e-02
## (phi)          1.579796e+03  2.002213e+03

No significant association between BC_1moPreSamp and prop_5mC in this model.

17.6 OM Models

17.6.1 Continuous OM in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ OM_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0751 -0.3923 -0.0665  0.2761  5.3599 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.705251   0.070833 -94.663   <2e-16 ***
## OM_5yrPreSamp  0.001038   0.023716   0.044    0.965    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2139.9      115.2   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 5.956e-06
## Number of iterations: 620 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)     -6.84408095   -6.5664218
## OM_5yrPreSamp   -0.04544473    0.0475205
## (phi)         1914.06415810 2365.6697541

No significant association between OM_5yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ OM_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1242 -0.3995 -0.0772  0.2704  5.3726 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.605964   0.196330 -33.647   <2e-16 ***
## OM_5yrPreSamp  0.001672   0.023746   0.070    0.944    
## sexFemale      0.015779   0.047752   0.330    0.741    
## age_dx        -0.001330   0.002600  -0.512    0.609    
## smokeHxEver   -0.021521   0.043042  -0.500    0.617    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.0      115.3   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.0021
## Number of iterations: 112 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.990764e+00 -6.221163e+00
## OM_5yrPreSamp -4.486938e-02  4.821437e-02
## sexFemale     -7.781207e-02  1.093708e-01
## age_dx        -6.425473e-03  3.765746e-03
## smokeHxEver   -1.058823e-01  6.284053e-02
## (phi)          1.915965e+03  2.368004e+03

No significant association between OM_5yrPreSamp and prop_5mC in this model.

17.6.2 Continuous OM in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ OM_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0732 -0.3830 -0.0651  0.2788  5.3650 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.66790    0.06916 -96.413   <2e-16 ***
## OM_1yrPreSamp -0.01111    0.02168  -0.513    0.608    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2130.9      115.1   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 3 Df
## Pseudo R-squared: 0.0008603
## Number of iterations: 88 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)     -6.80345291   -6.5323517
## OM_1yrPreSamp   -0.05361151    0.0313841
## (phi)         1905.43479829 2356.4369761

No significant association between OM_1yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ OM_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1018 -0.3876 -0.0690  0.2764  5.3784 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.567592   0.196053 -33.499   <2e-16 ***
## OM_1yrPreSamp -0.010790   0.021721  -0.497    0.619    
## sexFemale      0.014044   0.047938   0.293    0.770    
## age_dx        -0.001325   0.002607  -0.508    0.611    
## smokeHxEver   -0.021765   0.043166  -0.504    0.614    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2133.0      115.2   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 6 Df
## Pseudo R-squared: 0.002871
## Number of iterations: 346 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.951848e+00 -6.183336e+00
## OM_1yrPreSamp -5.336236e-02  3.178289e-02
## sexFemale     -7.991235e-02  1.080005e-01
## age_dx        -6.433739e-03  3.784272e-03
## smokeHxEver   -1.063690e-01  6.283806e-02
## (phi)          1.907254e+03  2.358672e+03

No significant association between OM_1yrPreSamp and prop_5mC in this model.

17.6.3 Continuous OM in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ OM_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0752 -0.3761 -0.0529  0.2772  5.3540 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.64928    0.05982  -111.2   <2e-16 ***
## OM_6moPreSamp -0.01760    0.01761    -1.0    0.318    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2113.0      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 0.003534
## Number of iterations: 740 (BFGS) + 4 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.76652082   -6.53203180
## OM_6moPreSamp   -0.05210588    0.01690988
## (phi)         1883.79643121 2342.20111613

No significant association between OM_6moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ OM_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0727 -0.3803 -0.0619  0.2743  5.3683 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.543505   0.194078 -33.716   <2e-16 ***
## OM_6moPreSamp -0.017468   0.017669  -0.989    0.323    
## sexFemale     -0.000434   0.049536  -0.009    0.993    
## age_dx        -0.001274   0.002650  -0.481    0.631    
## smokeHxEver   -0.029316   0.044326  -0.661    0.508    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2115.1      117.1   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4169 on 6 Df
## Pseudo R-squared: 0.00567
## Number of iterations: 242 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)     -6.92389028 -6.163120e+00
## OM_6moPreSamp   -0.05209857  1.716263e-02
## sexFemale       -0.09752197  9.665405e-02
## age_dx          -0.00646855  3.920306e-03
## smokeHxEver     -0.11619313  5.756049e-02
## (phi)         1885.70778639  2.344562e+03

No significant association between OM_6moPreSamp and prop_5mC in this model.

17.6.4 Continuous OM in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ OM_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0648 -0.3936 -0.0608  0.2796  5.0418 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.6553839  0.0621073 -107.159   <2e-16 ***
## OM_3moPreSamp  0.0009425  0.0186360    0.051     0.96    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1865.4      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 1.05e-05
## Number of iterations: 263 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.77711203   -6.53365578
## OM_3moPreSamp   -0.03558341    0.03746848
## (phi)         1648.89342679 2081.86878297

No significant association between OM_3moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ OM_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1402 -0.3956 -0.0626  0.2714  5.0461 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.592241   0.210304 -31.346   <2e-16 ***
## OM_3moPreSamp  0.001130   0.018654   0.061    0.952    
## sexFemale     -0.009525   0.054375  -0.175    0.861    
## age_dx        -0.000550   0.002887  -0.191    0.849    
## smokeHxEver   -0.037345   0.048899  -0.764    0.445    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1867.4      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.002389
## Number of iterations: 164 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -7.004429e+00 -6.180054e+00
## OM_3moPreSamp -3.543155e-02  3.769071e-02
## sexFemale     -1.160969e-01  9.704770e-02
## age_dx        -6.208551e-03  5.108466e-03
## smokeHxEver   -1.331857e-01  5.849539e-02
## (phi)          1.650690e+03  2.084119e+03

No significant association between OM_3moPreSamp and prop_5mC in this model.

17.6.5 Continuous OM in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ OM_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0359 -0.4023 -0.0649  0.2739  4.9475 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.6360060  0.0602069 -110.220   <2e-16 ***
## OM_1moPreSamp -0.0002649  0.0177554   -0.015    0.988    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.7      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 9.544e-07
## Number of iterations: 270 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.75400939   -6.51800259
## OM_1moPreSamp   -0.03506479    0.03453505
## (phi)         1576.84078287 1998.49773516

No significant association between OM_1moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ OM_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ OM_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0815 -0.4056 -0.0684  0.2754  4.9492 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.5786296  0.2144246 -30.680   <2e-16 ***
## OM_1moPreSamp  0.0002388  0.0177909   0.013    0.989    
## sexFemale     -0.0009271  0.0552865  -0.017    0.987    
## age_dx        -0.0005141  0.0029707  -0.173    0.863    
## smokeHxEver   -0.0365293  0.0505062  -0.723    0.470    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.5      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 6 Df
## Pseudo R-squared: 0.002213
## Number of iterations: 137 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)     -6.99889405 -6.158365e+00
## OM_1moPreSamp   -0.03463082  3.510837e-02
## sexFemale       -0.10928656  1.074324e-01
## age_dx          -0.00633657  5.308324e-03
## smokeHxEver     -0.13551959  6.246097e-02
## (phi)         1578.45965171  2.000533e+03

No significant association between OM_1moPreSamp and prop_5mC in this model.

17.7 SS Models

17.7.1 Continuous SS in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SS_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0720 -0.3899 -0.0578  0.2834  5.3641 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.69654    0.02985 -224.373   <2e-16 ***
## SS_5yrPreSamp -0.01611    0.05450   -0.296    0.768    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2140.1      115.2   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.0002811
## Number of iterations: 1089 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                      2.5 %        97.5 %
## (Intercept)     -6.7550382   -6.63804567
## SS_5yrPreSamp   -0.1229363    0.09071838
## (phi)         1914.3040988 2365.96438065

No significant association between SS_5yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SS_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1199 -0.4043 -0.0707  0.2717  5.3763 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.598356   0.183839 -35.892   <2e-16 ***
## SS_5yrPreSamp -0.013103   0.054635  -0.240    0.810    
## sexFemale      0.015320   0.047691   0.321    0.748    
## age_dx        -0.001307   0.002604  -0.502    0.616    
## smokeHxEver   -0.020985   0.043096  -0.487    0.626    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.2      115.3   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.002256
## Number of iterations: 242 (BFGS) + 4 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.958675e+00 -6.238037e+00
## SS_5yrPreSamp -1.201858e-01  9.397972e-02
## sexFemale     -7.815213e-02  1.087927e-01
## age_dx        -6.409553e-03  3.796226e-03
## smokeHxEver   -1.054509e-01  6.348041e-02
## (phi)          1.916116e+03  2.368189e+03

No significant association between SS_5yrPreSamp and prop_5mC in this model.

17.7.2 Continuous SS in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SS_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0718 -0.3909 -0.0585  0.2850  5.3497 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.697963   0.028850 -232.166   <2e-16 ***
## SS_1yrPreSamp -0.008935   0.045803   -0.195    0.845    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2130        115   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4375 on 3 Df
## Pseudo R-squared: 0.000129
## Number of iterations: 2391 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.75450808   -6.64141866
## SS_1yrPreSamp   -0.09870806    0.08083783
## (phi)         1904.80359011 2355.66149111

No significant association between SS_1yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SS_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1197 -0.4005 -0.0724  0.2704  5.3623 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.598585   0.184376 -35.789   <2e-16 ***
## SS_1yrPreSamp -0.006818   0.045881  -0.149    0.882    
## sexFemale      0.015233   0.047892   0.318    0.750    
## age_dx        -0.001319   0.002608  -0.506    0.613    
## smokeHxEver   -0.021110   0.043237  -0.488    0.625    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2132.3      115.1   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 6 Df
## Pseudo R-squared: 0.002132
## Number of iterations: 320 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.959956e+00 -6.237214e+00
## SS_1yrPreSamp -9.674419e-02  8.310787e-02
## sexFemale     -7.863385e-02  1.090989e-01
## age_dx        -6.430609e-03  3.791821e-03
## smokeHxEver   -1.058518e-01  6.363260e-02
## (phi)          1.906624e+03  2.357898e+03

No significant association between SS_1yrPreSamp and prop_5mC in this model.

17.7.3 Continuous SS in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SS_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1467 -0.3920 -0.0622  0.2813  5.3081 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.71625    0.02873 -233.751   <2e-16 ***
## SS_6moPreSamp  0.03040    0.04273    0.711    0.477    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2111.3      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 0.001616
## Number of iterations: 3343 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)     -6.77256322   -6.6599338
## SS_6moPreSamp   -0.05335769    0.1141513
## (phi)         1882.26208737 2340.3062579

No significant association between SS_6moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SS_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.2824 -0.3913 -0.0672  0.2802  5.3216 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.604437   0.187505 -35.223   <2e-16 ***
## SS_6moPreSamp  0.032491   0.042805   0.759    0.448    
## sexFemale      0.004555   0.049435   0.092    0.927    
## age_dx        -0.001379   0.002647  -0.521    0.602    
## smokeHxEver   -0.030107   0.044404  -0.678    0.498    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2114        117   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 6 Df
## Pseudo R-squared: 0.004115
## Number of iterations: 221 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.971940e+00 -6.236934e+00
## SS_6moPreSamp -5.140560e-02  1.163868e-01
## sexFemale     -9.233491e-02  1.014454e-01
## age_dx        -6.567051e-03  3.809361e-03
## smokeHxEver   -1.171359e-01  5.692263e-02
## (phi)          1.884436e+03  2.342990e+03

No significant association between SS_6moPreSamp and prop_5mC in this model.

17.7.4 Continuous SS in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SS_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0509 -0.3861 -0.0626  0.2710  5.0370 
## 
## Coefficients (mean model with logit link):
##               Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.65866    0.03094 -215.183   <2e-16 ***
## SS_3moPreSamp  0.01610    0.04427    0.364    0.716    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1865.8      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.0004941
## Number of iterations: 1096 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %       97.5 %
## (Intercept)     -6.71930994   -6.5980108
## SS_3moPreSamp   -0.07066867    0.1028688
## (phi)         1649.25840942 2082.3260149

No significant association between SS_3moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SS_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1411 -0.3960 -0.0554  0.2635  5.0402 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.5932796  0.2021169 -32.621   <2e-16 ***
## SS_3moPreSamp  0.0187923  0.0443956   0.423    0.672    
## sexFemale     -0.0095203  0.0543244  -0.175    0.861    
## age_dx        -0.0005766  0.0028861  -0.200    0.842    
## smokeHxEver   -0.0387379  0.0490000  -0.791    0.429    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1868.0      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.003035
## Number of iterations: 183 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.989421e+00 -6.197138e+00
## SS_3moPreSamp -6.822136e-02  1.058061e-01
## sexFemale     -1.159941e-01  9.695354e-02
## age_dx        -6.233257e-03  5.080085e-03
## smokeHxEver   -1.347763e-01  5.730040e-02
## (phi)          1.651183e+03  2.084737e+03

No significant association between SS_3moPreSamp and prop_5mC in this model.

17.7.5 Continuous SS in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ SS_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0353 -0.4032 -0.0662  0.2776  4.9484 
## 
## Coefficients (mean model with logit link):
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -6.635967   0.031196 -212.721   <2e-16 ***
## SS_1moPreSamp -0.002166   0.041731   -0.052    0.959    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.7      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 1.061e-05
## Number of iterations: 718 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                       2.5 %        97.5 %
## (Intercept)     -6.69710948   -6.57482463
## SS_1moPreSamp   -0.08395841    0.07962586
## (phi)         1576.84776623 1998.50651478

No significant association between SS_1moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ SS_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ SS_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0840 -0.4058 -0.0683  0.2750  4.9495 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -6.578e+00  2.072e-01 -31.745   <2e-16 ***
## SS_1moPreSamp  1.913e-05  4.184e-02   0.000    1.000    
## sexFemale     -9.694e-04  5.521e-02  -0.018    0.986    
## age_dx        -5.139e-04  2.971e-03  -0.173    0.863    
## smokeHxEver   -3.651e-02  5.058e-02  -0.722    0.470    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.5      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 6 Df
## Pseudo R-squared: 0.002212
## Number of iterations: 163 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
##                       2.5 %        97.5 %
## (Intercept)   -6.984053e+00 -6.171799e+00
## SS_1moPreSamp -8.197974e-02  8.201800e-02
## sexFemale     -1.091700e-01  1.072311e-01
## age_dx        -6.337487e-03  5.309645e-03
## smokeHxEver   -1.356441e-01  6.262374e-02
## (phi)          1.578459e+03  2.000532e+03

No significant association between SS_1moPreSamp and prop_5mC in this model.

17.8 Soil Models

17.8.1 Continuous Soil in 5yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ Soil_5yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_5yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0549 -0.3867 -0.0573  0.2892  5.3688 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     -6.68287    0.04009 -166.679   <2e-16 ***
## Soil_5yrPreSamp -0.03231    0.05522   -0.585    0.559    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2140.9      115.3   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 3 Df
## Pseudo R-squared: 0.001072
## Number of iterations: 879 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %        97.5 %
## (Intercept)       -6.7614568   -6.60429057
## Soil_5yrPreSamp   -0.1405345    0.07592242
## (phi)           1915.0092460 2366.83027988

No significant association between Soil_5yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ Soil_5yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_5yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1199 -0.3929 -0.0731  0.2755  5.3806 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.588855   0.185056 -35.605   <2e-16 ***
## Soil_5yrPreSamp -0.029329   0.055365  -0.530    0.596    
## sexFemale        0.015518   0.047661   0.326    0.745    
## age_dx          -0.001264   0.002604  -0.485    0.627    
## smokeHxEver     -0.020285   0.043115  -0.470    0.638    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2142.8      115.4   18.57   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4401 on 6 Df
## Pseudo R-squared: 0.002947
## Number of iterations: 255 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                         2.5 %        97.5 %
## (Intercept)     -6.951558e+00 -6.226153e+00
## Soil_5yrPreSamp -1.378416e-01  7.918383e-02
## sexFemale       -7.789655e-02  1.089322e-01
## age_dx          -6.366476e-03  3.839217e-03
## smokeHxEver     -1.047890e-01  6.421995e-02
## (phi)            1.916735e+03  2.368949e+03

No significant association between Soil_5yrPreSamp and prop_5mC in this model.

17.8.2 Continuous Soil in 1yrs Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ Soil_1yrPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_1yrPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0661 -0.3868 -0.0650  0.2811  5.3484 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     -6.696799   0.041699 -160.599   <2e-16 ***
## Soil_1yrPreSamp -0.007922   0.059754   -0.133    0.895    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)     2130        115   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4375 on 3 Df
## Pseudo R-squared: 5.759e-05
## Number of iterations: 578 (BFGS) + 5 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)       -6.7785277   -6.6150711
## Soil_1yrPreSamp   -0.1250372    0.1091924
## (phi)           1904.7442495 2355.5885911

No significant association between Soil_1yrPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ Soil_1yrPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_1yrPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1157 -0.3983 -0.0676  0.2707  5.3610 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.597476   0.186468 -35.381   <2e-16 ***
## Soil_1yrPreSamp -0.006170   0.059746  -0.103    0.918    
## sexFemale        0.015352   0.047873   0.321    0.748    
## age_dx          -0.001320   0.002607  -0.506    0.613    
## smokeHxEver     -0.021368   0.043188  -0.495    0.621    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2132.2      115.1   18.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4376 on 6 Df
## Pseudo R-squared: 0.0021
## Number of iterations: 294 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                         2.5 %        97.5 %
## (Intercept)     -6.962946e+00 -6.232005e+00
## Soil_1yrPreSamp -1.232706e-01  1.109301e-01
## sexFemale       -7.847653e-02  1.091805e-01
## age_dx          -6.429905e-03  3.790734e-03
## smokeHxEver     -1.060151e-01  6.327939e-02
## (phi)            1.906590e+03  2.357856e+03

No significant association between Soil_1yrPreSamp and prop_5mC in this model.

17.8.3 Continuous Soil in 6mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ Soil_6moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_6moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0559 -0.3802 -0.0641  0.2780  5.3166 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     -6.701518   0.038543 -173.872   <2e-16 ***
## Soil_6moPreSamp -0.005203   0.053030   -0.098    0.922    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2109.7      116.8   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 3 Df
## Pseudo R-squared: 3.52e-05
## Number of iterations: 1019 (BFGS) + 3 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %        97.5 %
## (Intercept)       -6.7770611   -6.62597566
## Soil_6moPreSamp   -0.1091401    0.09873448
## (phi)           1880.8874972 2338.60883090

No significant association between Soil_6moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ Soil_6moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_6moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1220 -0.3952 -0.0687  0.2738  5.3309 
## 
## Coefficients (mean model with logit link):
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.591913   0.189159 -34.849   <2e-16 ***
## Soil_6moPreSamp -0.003896   0.053067  -0.073    0.941    
## sexFemale        0.003649   0.049457   0.074    0.941    
## age_dx          -0.001361   0.002649  -0.514    0.607    
## smokeHxEver     -0.028146   0.044359  -0.634    0.526    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   2111.9      116.9   18.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  4168 on 6 Df
## Pseudo R-squared: 0.002283
## Number of iterations: 196 (BFGS) + 2 (Fisher scoring)
confint(dnam_model2)
##                         2.5 %        97.5 %
## (Intercept)     -6.962658e+00 -6.221168e+00
## Soil_6moPreSamp -1.079061e-01  1.001131e-01
## sexFemale       -9.328536e-02  1.005829e-01
## age_dx          -6.552341e-03  3.830855e-03
## smokeHxEver     -1.150878e-01  5.879679e-02
## (phi)            1.882855e+03  2.341039e+03

No significant association between Soil_6moPreSamp and prop_5mC in this model.

17.8.4 Continuous Soil in 3mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ Soil_3moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_3moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0520 -0.3909 -0.0637  0.2718  5.0391 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     -6.66071    0.03806 -175.008   <2e-16 ***
## Soil_3moPreSamp  0.01380    0.04695    0.294    0.769    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1865.7      110.5   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 3 Df
## Pseudo R-squared: 0.0003832
## Number of iterations: 1145 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                        2.5 %       97.5 %
## (Intercept)       -6.7353016   -6.5861115
## Soil_3moPreSamp   -0.0782205    0.1058194
## (phi)           1649.1480175 2082.1876444

No significant association between Soil_3moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ Soil_3moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_3moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.1258 -0.3985 -0.0612  0.2637  5.0427 
## 
## Coefficients (mean model with logit link):
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.5988793  0.2045835 -32.255   <2e-16 ***
## Soil_3moPreSamp  0.0143826  0.0470619   0.306    0.760    
## sexFemale       -0.0088614  0.0544092  -0.163    0.871    
## age_dx          -0.0005267  0.0028875  -0.182    0.855    
## smokeHxEver     -0.0376970  0.0489020  -0.771    0.441    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1867.7      110.6   16.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3619 on 6 Df
## Pseudo R-squared: 0.002781
## Number of iterations: 274 (BFGS) + 5 (Fisher scoring)
confint(dnam_model2)
##                         2.5 %        97.5 %
## (Intercept)     -6.999856e+00 -6.197903e+00
## Soil_3moPreSamp -7.785708e-02  1.066223e-01
## sexFemale       -1.155015e-01  9.777870e-02
## age_dx          -6.186157e-03  5.132669e-03
## smokeHxEver     -1.335433e-01  5.814920e-02
## (phi)            1.650963e+03  2.084461e+03

No significant association between Soil_3moPreSamp and prop_5mC in this model.

17.8.5 Continuous Soil in 1mo Pre-Sampling

Unadjusted model

dnam_model1 <- betareg(prop_5mC ~ Soil_1moPreSamp, data=dnam)
summary(dnam_model1)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_1moPreSamp, data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0515 -0.3998 -0.0641  0.2723  4.9462 
## 
## Coefficients (mean model with logit link):
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)     -6.64390    0.03768 -176.345   <2e-16 ***
## Soil_1moPreSamp  0.01192    0.04470    0.267     0.79    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1787.9      107.6   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3499 on 3 Df
## Pseudo R-squared: 0.0003353
## Number of iterations: 1418 (BFGS) + 2 (Fisher scoring)
confint(dnam_model1)
##                         2.5 %        97.5 %
## (Intercept)       -6.71774017   -6.57005506
## Soil_1moPreSamp   -0.07569086    0.09952216
## (phi)           1577.05602332 1998.76822882

No significant association between Soil_1moPreSamp and prop_5mC in this model.

Adjusted model with age_dx, sex, smokeHx

dnam_model2 <- betareg(prop_5mC ~ Soil_1moPreSamp + sex + age_dx + smokeHx, data=dnam)
summary(dnam_model2)
## 
## Call:
## betareg(formula = prop_5mC ~ Soil_1moPreSamp + sex + age_dx + smokeHx, 
##     data = dnam)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.0707 -0.4040 -0.0658  0.2791  4.9473 
## 
## Coefficients (mean model with logit link):
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -6.587e+00  2.096e-01 -31.423   <2e-16 ***
## Soil_1moPreSamp  1.254e-02  4.485e-02   0.280    0.780    
## sexFemale        8.390e-06  5.531e-02   0.000    1.000    
## age_dx          -4.849e-04  2.972e-03  -0.163    0.870    
## smokeHxEver     -3.677e-02  5.050e-02  -0.728    0.466    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)   1789.8      107.7   16.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  3500 on 6 Df
## Pseudo R-squared: 0.002581
## Number of iterations: 176 (BFGS) + 3 (Fisher scoring)
confint(dnam_model2)
##                         2.5 %        97.5 %
## (Intercept)     -6.998324e+00 -6.176552e+00
## Soil_1moPreSamp -7.536798e-02  1.004392e-01
## sexFemale       -1.084026e-01  1.084194e-01
## age_dx          -6.309719e-03  5.339838e-03
## smokeHxEver     -1.357450e-01  6.220024e-02
## (phi)            1.578696e+03  2.000829e+03

No significant association between Soil_1moPreSamp and prop_5mC in this model.

18 Impact of DNAm on Mortality

Base model

coxPH_model1 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC, data=dnam, id=ID)
summary(coxPH_model1)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC, 
##     data = dnam, id = ID)
## 
##   n= 746, number of events= 221 
## 
##            coef exp(coef) se(coef)      z Pr(>|z|)
## pct_5mC -0.1470    0.8633   0.4149 -0.354    0.723
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC    0.8633      1.158    0.3828     1.947
## 
## Concordance= 0.563  (se = 0.022 )
## Likelihood ratio test= 0.14  on 1 df,   p=0.7
## Wald test            = 0.13  on 1 df,   p=0.7
## Score (logrank) test = 0.13  on 1 df,   p=0.7

No significant association between %5mC and mortality in this model.

Partial Model

coxPH_model2 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC + age_dx + sex + smokeHx, data=dnam, id=ID)
summary(coxPH_model2)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC + 
##     age_dx + sex + smokeHx, data = dnam, id = ID)
## 
##   n= 746, number of events= 221 
## 
##                  coef exp(coef)  se(coef)      z Pr(>|z|)   
## pct_5mC     -0.179632  0.835578  0.387670 -0.463  0.64310   
## age_dx       0.024722  1.025030  0.009071  2.725  0.00642 **
## sexFemale   -0.280008  0.755778  0.167263 -1.674  0.09412 . 
## smokeHxEver  0.428896  1.535562  0.150184  2.856  0.00429 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC        0.8356     1.1968    0.3908     1.786
## age_dx         1.0250     0.9756    1.0070     1.043
## sexFemale      0.7558     1.3231    0.5445     1.049
## smokeHxEver    1.5356     0.6512    1.1440     2.061
## 
## Concordance= 0.599  (se = 0.021 )
## Likelihood ratio test= 21.2  on 4 df,   p=3e-04
## Wald test            = 20.02  on 4 df,   p=5e-04
## Score (logrank) test = 20.17  on 4 df,   p=5e-04

No significant association between %5mC and mortality in this model.

Complete Model

coxPH_model3 <- coxph(Surv(time_DeathTxCensor, deadORtx==1) ~ pct_5mC + age_dx + sex + smokeHx + dich_Race + fvc_pct + dlco_pct, data=dnam, id=ID)
summary(coxPH_model3)
## Call:
## coxph(formula = Surv(time_DeathTxCensor, deadORtx == 1) ~ pct_5mC + 
##     age_dx + sex + smokeHx + dich_Race + fvc_pct + dlco_pct, 
##     data = dnam, id = ID)
## 
##   n= 637, number of events= 182 
##    (109 observations deleted due to missingness)
## 
##                         coef exp(coef)  se(coef)      z Pr(>|z|)    
## pct_5mC             0.283617  1.327924  0.374374  0.758   0.4487    
## age_dx              0.017992  1.018155  0.010130  1.776   0.0757 .  
## sexFemale          -0.242890  0.784358  0.187603 -1.295   0.1954    
## smokeHxEver         0.187012  1.205641  0.169357  1.104   0.2695    
## dich_RaceNon-White -0.039410  0.961356  0.330653 -0.119   0.9051    
## fvc_pct            -0.010389  0.989665  0.005351 -1.941   0.0522 .  
## dlco_pct           -0.047091  0.954001  0.006616 -7.118  1.1e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                    exp(coef) exp(-coef) lower .95 upper .95
## pct_5mC               1.3279     0.7531    0.6375    2.7659
## age_dx                1.0182     0.9822    0.9981    1.0386
## sexFemale             0.7844     1.2749    0.5430    1.1329
## smokeHxEver           1.2056     0.8294    0.8651    1.6803
## dich_RaceNon-White    0.9614     1.0402    0.5028    1.8380
## fvc_pct               0.9897     1.0104    0.9793    1.0001
## dlco_pct              0.9540     1.0482    0.9417    0.9665
## 
## Concordance= 0.728  (se = 0.019 )
## Likelihood ratio test= 98.07  on 7 df,   p=<2e-16
## Wald test            = 90.24  on 7 df,   p=<2e-16
## Score (logrank) test = 87.62  on 7 df,   p=4e-16

No significant association between %5mC and mortality in this model.

19 Impact of DNAm on Baseline FVC

Base Model

FVC_model1 <- lm(fvc_pct ~ pct_5mC, data=dnam)
summary(FVC_model1)
## 
## Call:
## lm(formula = fvc_pct ~ pct_5mC, data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.491 -12.626  -0.945  11.722  52.299 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  68.2899     0.7317  93.327   <2e-16 ***
## pct_5mC      -1.9395     2.9459  -0.658    0.511    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.62 on 676 degrees of freedom
##   (68 observations deleted due to missingness)
## Multiple R-squared:  0.0006408,  Adjusted R-squared:  -0.0008375 
## F-statistic: 0.4335 on 1 and 676 DF,  p-value: 0.5105
confint(FVC_model1)
##                2.5 %    97.5 %
## (Intercept) 66.85321 69.726671
## pct_5mC     -7.72373  3.844698

No significant association between %5mC and baseline FVC.

Partial Model

FVC_model2 <- lm(fvc_pct ~ pct_5mC + sex + age_dx, data=dnam)
summary(FVC_model2)
## 
## Call:
## lm(formula = fvc_pct ~ pct_5mC + sex + age_dx, data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.727 -11.494  -0.959  11.328  50.134 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.11370    5.53407   7.791 2.53e-14 ***
## pct_5mC     -1.35251    2.90813  -0.465    0.642    
## sexFemale    1.79770    1.45654   1.234    0.218    
## age_dx       0.35917    0.07903   4.545 6.52e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.39 on 674 degrees of freedom
##   (68 observations deleted due to missingness)
## Multiple R-squared:  0.03124,    Adjusted R-squared:  0.02693 
## F-statistic: 7.245 on 3 and 674 DF,  p-value: 8.664e-05
confint(FVC_model2)
##                 2.5 %     97.5 %
## (Intercept) 32.247610 53.9797940
## pct_5mC     -7.062587  4.3575710
## sexFemale   -1.062205  4.6576114
## age_dx       0.203992  0.5143502

No significant association between %5mC and baseline FVC.

Complete Model

FVC_model3 <- lm(fvc_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx, data=dnam)
summary(FVC_model3)
## 
## Call:
## lm(formula = fvc_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx, 
##     data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.127 -11.729  -0.653  11.433  49.479 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        42.12376    5.59302   7.531 1.62e-13 ***
## pct_5mC            -1.42849    2.90506  -0.492    0.623    
## sexFemale           1.98176    1.46636   1.351    0.177    
## age_dx              0.36560    0.07905   4.625 4.50e-06 ***
## dich_RaceNon-White -4.67740    2.84535  -1.644    0.101    
## smokeHxEver         1.19281    1.31367   0.908    0.364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.37 on 672 degrees of freedom
##   (68 observations deleted due to missingness)
## Multiple R-squared:  0.03636,    Adjusted R-squared:  0.02919 
## F-statistic: 5.072 on 5 and 672 DF,  p-value: 0.0001427
confint(FVC_model3)
##                          2.5 %     97.5 %
## (Intercept)         31.1418685 53.1056610
## pct_5mC             -7.1325846  4.2755963
## sexFemale           -0.8974351  4.8609471
## age_dx               0.2103857  0.5208138
## dich_RaceNon-White -10.2642367  0.9094403
## smokeHxEver         -1.3865802  3.7721931

No significant association between %5mC and baseline FVC.

20 Impact of DNAm on Baseline DLCO

Base Model

DLCO_model1 <- lm(dlco_pct ~ pct_5mC, data=dnam)
summary(DLCO_model1)
## 
## Call:
## lm(formula = dlco_pct ~ pct_5mC, data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.180 -10.587  -1.157   8.675 128.215 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  40.5807     0.6932  58.544   <2e-16 ***
## pct_5mC       1.2988     2.7239   0.477    0.634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.35 on 636 degrees of freedom
##   (108 observations deleted due to missingness)
## Multiple R-squared:  0.0003573,  Adjusted R-squared:  -0.001214 
## F-statistic: 0.2273 on 1 and 636 DF,  p-value: 0.6337
confint(DLCO_model1)
##                 2.5 %    97.5 %
## (Intercept) 39.219564 41.941931
## pct_5mC     -4.050202  6.647734

No significant association between %5mC and baseline DLCO

Partial Model

DLCO_model2 <- lm(dlco_pct ~ pct_5mC + sex + age_dx, data=dnam)
summary(DLCO_model2)
## 
## Call:
## lm(formula = dlco_pct ~ pct_5mC + sex + age_dx, data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.675 -10.335  -1.376   8.650 128.622 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 43.98484    5.36943   8.192 1.42e-15 ***
## pct_5mC      1.30348    2.72686   0.478    0.633    
## sexFemale    1.62893    1.39816   1.165    0.244    
## age_dx      -0.05576    0.07672  -0.727    0.468    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.34 on 634 degrees of freedom
##   (108 observations deleted due to missingness)
## Multiple R-squared:  0.003633,   Adjusted R-squared:  -0.001081 
## F-statistic: 0.7707 on 3 and 634 DF,  p-value: 0.5107
confint(DLCO_model2)
##                  2.5 %      97.5 %
## (Intercept) 33.4408166 54.52886557
## pct_5mC     -4.0512810  6.65824801
## sexFemale   -1.1166552  4.37450659
## age_dx      -0.2064242  0.09490435

No significant association between %5mC and baseline DLCO

Complete Model

DLCO_model3 <- lm(dlco_pct ~ pct_5mC + sex + age_dx + dich_Race + smokeHx, data=dnam)
summary(DLCO_model3)
## 
## Call:
## lm(formula = dlco_pct ~ pct_5mC + sex + age_dx + dich_Race + 
##     smokeHx, data = dnam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.586  -9.884  -0.975   7.828 128.148 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        47.01113    5.38189   8.735  < 2e-16 ***
## pct_5mC             1.39641    2.70181   0.517 0.605449    
## sexFemale           0.95389    1.39781   0.682 0.495224    
## age_dx             -0.05614    0.07611  -0.738 0.461048    
## dich_RaceNon-White  2.05384    2.72268   0.754 0.450923    
## smokeHxEver        -4.59330    1.26719  -3.625 0.000312 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.2 on 632 degrees of freedom
##   (108 observations deleted due to missingness)
## Multiple R-squared:  0.02509,    Adjusted R-squared:  0.01737 
## F-statistic: 3.253 on 5 and 632 DF,  p-value: 0.006578
confint(DLCO_model3)
##                         2.5 %      97.5 %
## (Intercept)        36.4425873 57.57968163
## pct_5mC            -3.9092120  6.70202324
## sexFemale          -1.7910254  3.69881159
## age_dx             -0.2055931  0.09332124
## dich_RaceNon-White -3.2927491  7.40042565
## smokeHxEver        -7.0817190 -2.10488804

No significant association between %5mC and baseline DLCO.